diff --git a/umn/source/_static/images/en-us_image_0000001223473845.png b/umn/source/_static/images/en-us_image_0000001082048529.png similarity index 100% rename from umn/source/_static/images/en-us_image_0000001223473845.png rename to umn/source/_static/images/en-us_image_0000001082048529.png diff --git a/umn/source/_static/images/en-us_image_0000001113962636.png b/umn/source/_static/images/en-us_image_0000001113962636.png new file mode 100644 index 0000000..7c35ae0 Binary files /dev/null and b/umn/source/_static/images/en-us_image_0000001113962636.png differ diff --git a/umn/source/_static/images/en-us_image_0000001145545261.png b/umn/source/_static/images/en-us_image_0000001145545261.png new file mode 100644 index 0000000..5ddb8e3 Binary files /dev/null and b/umn/source/_static/images/en-us_image_0000001145545261.png differ diff --git a/umn/source/_static/images/en-us_image_0000001160642447.png b/umn/source/_static/images/en-us_image_0000001160642447.png new file mode 100644 index 0000000..1be0aa0 Binary files /dev/null and b/umn/source/_static/images/en-us_image_0000001160642447.png differ diff --git a/umn/source/_static/images/en-us_image_0000001171703840.png b/umn/source/_static/images/en-us_image_0000001171703840.png new file mode 100644 index 0000000..ddd83bb Binary files /dev/null and b/umn/source/_static/images/en-us_image_0000001171703840.png differ diff --git a/umn/source/_static/images/en-us_image_0000001172392670.png b/umn/source/_static/images/en-us_image_0000001172392670.png new file mode 100644 index 0000000..05cb59e Binary files /dev/null and b/umn/source/_static/images/en-us_image_0000001172392670.png differ diff --git a/umn/source/_static/images/en-us_image_0000001176818150.png b/umn/source/_static/images/en-us_image_0000001176818150.png new file mode 100644 index 0000000..efdf9ff Binary files /dev/null and b/umn/source/_static/images/en-us_image_0000001176818150.png differ diff --git a/umn/source/_static/images/en-us_image_0000001178034114.png b/umn/source/_static/images/en-us_image_0000001178034114.png deleted file mode 100644 index d7b0553..0000000 Binary files a/umn/source/_static/images/en-us_image_0000001178034114.png and /dev/null differ diff --git a/umn/source/_static/images/en-us_image_0000001178034116.png b/umn/source/_static/images/en-us_image_0000001178034116.png deleted file mode 100644 index e6a687b..0000000 Binary files a/umn/source/_static/images/en-us_image_0000001178034116.png and /dev/null differ diff --git a/umn/source/_static/images/en-us_image_0000001178352604.png b/umn/source/_static/images/en-us_image_0000001178352604.png deleted file mode 100644 index 5bfbc6c..0000000 Binary files a/umn/source/_static/images/en-us_image_0000001178352604.png and /dev/null differ diff --git a/umn/source/_static/images/en-us_image_0000001207036074.png b/umn/source/_static/images/en-us_image_0000001207036074.png new file mode 100644 index 0000000..33a41a2 Binary files /dev/null and b/umn/source/_static/images/en-us_image_0000001207036074.png differ diff --git a/umn/source/_static/images/en-us_image_0000001217183707.png b/umn/source/_static/images/en-us_image_0000001217183707.png new file mode 100644 index 0000000..e8a2751 Binary files /dev/null and b/umn/source/_static/images/en-us_image_0000001217183707.png differ diff --git a/umn/source/_static/images/en-us_image_0000001218074121.png b/umn/source/_static/images/en-us_image_0000001218074121.png new file mode 100644 index 0000000..97a22d8 Binary files /dev/null and b/umn/source/_static/images/en-us_image_0000001218074121.png differ diff --git a/umn/source/_static/images/en-us_image_0000001221501677.png b/umn/source/_static/images/en-us_image_0000001221501677.png new file mode 100644 index 0000000..f1403c1 Binary files /dev/null and b/umn/source/_static/images/en-us_image_0000001221501677.png differ diff --git a/umn/source/_static/images/en-us_image_0000001221820189.png b/umn/source/_static/images/en-us_image_0000001221820189.png new file mode 100644 index 0000000..2af028b Binary files /dev/null and b/umn/source/_static/images/en-us_image_0000001221820189.png differ diff --git a/umn/source/_static/images/en-us_image_0000001223152417.png b/umn/source/_static/images/en-us_image_0000001223152417.png deleted file mode 100644 index 64a42b7..0000000 Binary files a/umn/source/_static/images/en-us_image_0000001223152417.png and /dev/null differ diff --git a/umn/source/_static/images/en-us_image_0000001223393893.png b/umn/source/_static/images/en-us_image_0000001223393893.png deleted file mode 100644 index 3f5e34e..0000000 Binary files a/umn/source/_static/images/en-us_image_0000001223393893.png and /dev/null differ diff --git a/umn/source/_static/images/en-us_image_0000001226818003.png b/umn/source/_static/images/en-us_image_0000001226818003.png new file mode 100644 index 0000000..c586bbf Binary files /dev/null and b/umn/source/_static/images/en-us_image_0000001226818003.png differ diff --git a/umn/source/_static/images/en-us_image_0000001238489436.png b/umn/source/_static/images/en-us_image_0000001238489436.png new file mode 100644 index 0000000..22b800a Binary files /dev/null and b/umn/source/_static/images/en-us_image_0000001238489436.png differ diff --git a/umn/source/_static/images/en-us_image_0000001238830246.png b/umn/source/_static/images/en-us_image_0000001238830246.png new file mode 100644 index 0000000..9d1f035 Binary files /dev/null and b/umn/source/_static/images/en-us_image_0000001238830246.png differ diff --git a/umn/source/_static/images/en-us_image_0000001238903330.png b/umn/source/_static/images/en-us_image_0000001238903330.png new file mode 100644 index 0000000..82ffe7e Binary files /dev/null and b/umn/source/_static/images/en-us_image_0000001238903330.png differ diff --git a/umn/source/_static/images/en-us_image_0000001251716033.png b/umn/source/_static/images/en-us_image_0000001251716033.png new file mode 100644 index 0000000..92f9830 Binary files /dev/null and b/umn/source/_static/images/en-us_image_0000001251716033.png differ diff --git a/umn/source/_static/images/en-us_image_0000001274543860.png b/umn/source/_static/images/en-us_image_0000001274543860.png new file mode 100644 index 0000000..43183cc Binary files /dev/null and b/umn/source/_static/images/en-us_image_0000001274543860.png differ diff --git a/umn/source/_static/images/en-us_image_0000001274544060.png b/umn/source/_static/images/en-us_image_0000001274544060.png new file mode 100644 index 0000000..3226bda Binary files /dev/null and b/umn/source/_static/images/en-us_image_0000001274544060.png differ diff --git a/umn/source/_static/images/en-us_image_0000001274864616.png b/umn/source/_static/images/en-us_image_0000001274864616.png new file mode 100644 index 0000000..1af07fd Binary files /dev/null and b/umn/source/_static/images/en-us_image_0000001274864616.png differ diff --git a/umn/source/_static/images/en-us_image_0000001274882416.png b/umn/source/_static/images/en-us_image_0000001274882416.png new file mode 100644 index 0000000..935bdf7 Binary files /dev/null and b/umn/source/_static/images/en-us_image_0000001274882416.png differ diff --git a/umn/source/_static/images/en-us_image_0000001283301301.png b/umn/source/_static/images/en-us_image_0000001283301301.png new file mode 100644 index 0000000..770a9fc Binary files /dev/null and b/umn/source/_static/images/en-us_image_0000001283301301.png differ diff --git a/umn/source/_static/images/en-us_image_0000001283343269.png b/umn/source/_static/images/en-us_image_0000001283343269.png new file mode 100644 index 0000000..4c17d7c Binary files /dev/null and b/umn/source/_static/images/en-us_image_0000001283343269.png differ diff --git a/umn/source/_static/images/en-us_image_0000001290111529.png b/umn/source/_static/images/en-us_image_0000001290111529.png new file mode 100644 index 0000000..226fcf7 Binary files /dev/null and b/umn/source/_static/images/en-us_image_0000001290111529.png differ diff --git a/umn/source/_static/images/en-us_image_0000001291567729.png b/umn/source/_static/images/en-us_image_0000001291567729.png new file mode 100644 index 0000000..b48bf29 Binary files /dev/null and b/umn/source/_static/images/en-us_image_0000001291567729.png differ diff --git a/umn/source/_static/images/en-us_image_0000001325377749.png b/umn/source/_static/images/en-us_image_0000001325377749.png new file mode 100644 index 0000000..6cab297 Binary files /dev/null and b/umn/source/_static/images/en-us_image_0000001325377749.png differ diff --git a/umn/source/_static/images/en-us_image_0000001352539924.png b/umn/source/_static/images/en-us_image_0000001352539924.png new file mode 100644 index 0000000..5ae1c1d Binary files /dev/null and b/umn/source/_static/images/en-us_image_0000001352539924.png differ diff --git a/umn/source/_static/images/en-us_image_0000001360670117.png b/umn/source/_static/images/en-us_image_0000001360670117.png new file mode 100644 index 0000000..603c946 Binary files /dev/null and b/umn/source/_static/images/en-us_image_0000001360670117.png differ diff --git a/umn/source/_static/images/en-us_image_0000001392259910.png b/umn/source/_static/images/en-us_image_0000001392259910.png new file mode 100644 index 0000000..1dbc838 Binary files /dev/null and b/umn/source/_static/images/en-us_image_0000001392259910.png differ diff --git a/umn/source/_static/images/en-us_image_0000001392280374.png b/umn/source/_static/images/en-us_image_0000001392280374.png new file mode 100644 index 0000000..c569d0f Binary files /dev/null and b/umn/source/_static/images/en-us_image_0000001392280374.png differ diff --git a/umn/source/_static/images/en-us_image_0000001392318380.png b/umn/source/_static/images/en-us_image_0000001392318380.png new file mode 100644 index 0000000..8e6d050 Binary files /dev/null and b/umn/source/_static/images/en-us_image_0000001392318380.png differ diff --git a/umn/source/_static/images/en-us_image_0000001402494682.png b/umn/source/_static/images/en-us_image_0000001402494682.png new file mode 100644 index 0000000..cf48188 Binary files /dev/null and b/umn/source/_static/images/en-us_image_0000001402494682.png differ diff --git a/umn/source/_static/images/en-us_image_0000001223152421.png b/umn/source/_static/images/en-us_image_0261818822.png similarity index 100% rename from umn/source/_static/images/en-us_image_0000001223152421.png rename to umn/source/_static/images/en-us_image_0261818822.png diff --git a/umn/source/_static/images/en-us_image_0000001178034110.png b/umn/source/_static/images/en-us_image_0261818824.png similarity index 100% rename from umn/source/_static/images/en-us_image_0000001178034110.png rename to umn/source/_static/images/en-us_image_0261818824.png diff --git a/umn/source/_static/images/en-us_image_0261818867.png b/umn/source/_static/images/en-us_image_0261818867.png new file mode 100644 index 0000000..99f7849 Binary files /dev/null and b/umn/source/_static/images/en-us_image_0261818867.png differ diff --git a/umn/source/_static/images/en-us_image_0000001178034108.png b/umn/source/_static/images/en-us_image_0261818875.png similarity index 100% rename from umn/source/_static/images/en-us_image_0000001178034108.png rename to umn/source/_static/images/en-us_image_0261818875.png diff --git a/umn/source/_static/images/en-us_image_0000001178192670.png b/umn/source/_static/images/en-us_image_0261818885.png similarity index 100% rename from umn/source/_static/images/en-us_image_0000001178192670.png rename to umn/source/_static/images/en-us_image_0261818885.png diff --git a/umn/source/_static/images/en-us_image_0000001223393899.png b/umn/source/_static/images/en-us_image_0261818886.png similarity index 100% rename from umn/source/_static/images/en-us_image_0000001223393899.png rename to umn/source/_static/images/en-us_image_0261818886.png diff --git a/umn/source/_static/images/en-us_image_0261820020.png b/umn/source/_static/images/en-us_image_0261820020.png new file mode 100644 index 0000000..b063957 Binary files /dev/null and b/umn/source/_static/images/en-us_image_0261820020.png differ diff --git a/umn/source/_static/images/en-us_image_0268523694.png b/umn/source/_static/images/en-us_image_0268523694.png new file mode 100644 index 0000000..718c8fa Binary files /dev/null and b/umn/source/_static/images/en-us_image_0268523694.png differ diff --git a/umn/source/_static/images/en-us_image_0278498565.png b/umn/source/_static/images/en-us_image_0278498565.png new file mode 100644 index 0000000..2756b2f Binary files /dev/null and b/umn/source/_static/images/en-us_image_0278498565.png differ diff --git a/umn/source/add-ons/autoscaler.rst b/umn/source/add-ons/autoscaler.rst index c22caea..52a845f 100644 --- a/umn/source/add-ons/autoscaler.rst +++ b/umn/source/add-ons/autoscaler.rst @@ -49,6 +49,7 @@ Notes and Constraints - Only clusters of v1.9.10-r2 and later support autoscaler. - Ensure that there are sufficient resources for installing the add-on. +- The default node pool does not support auto scaling. For details, see :ref:`Description of DefaultPool `. .. _cce_01_0154__section15573161754711: diff --git a/umn/source/add-ons/everest_system_resource_add-on_mandatory.rst b/umn/source/add-ons/everest_system_resource_add-on_mandatory.rst index 11cf87a..e1217f2 100644 --- a/umn/source/add-ons/everest_system_resource_add-on_mandatory.rst +++ b/umn/source/add-ons/everest_system_resource_add-on_mandatory.rst @@ -15,7 +15,7 @@ Everest is a cloud-native container storage system. Based on Container Storage I Notes and Constraints --------------------- -- If your cluster is upgraded from v1.13 to v1.15, :ref:`storage-driver ` is replaced by everest (v1.1.6 or later) for container storage. The takeover does not affect the original storage functions. For details about CSI and FlexVolume, see :ref:`Differences Between CSI and FlexVolume Plug-ins `. +- If your cluster is upgraded from v1.13 to v1.15, :ref:`storage-driver ` is replaced by everest (v1.1.6 or later) for container storage. The takeover does not affect the original storage functions. For details about CSI and FlexVolume, see :ref:`Differences Between CSI and FlexVolume Plug-ins `. - In version 1.2.0 of the everest add-on, **key authentication** is optimized when OBS is used. After the everest add-on is upgraded from a version earlier than 1.2.0, you need to restart all workloads that use OBS in the cluster. Otherwise, workloads may not be able to use OBS. - By default, this add-on is installed in **clusters of v1.15 and later**. For clusters of v1.13 and earlier, the :ref:`storage-driver ` add-on is installed by default. diff --git a/umn/source/add-ons/index.rst b/umn/source/add-ons/index.rst index ef6de48..438cd3b 100644 --- a/umn/source/add-ons/index.rst +++ b/umn/source/add-ons/index.rst @@ -12,6 +12,7 @@ Add-ons - :ref:`autoscaler ` - :ref:`metrics-server ` - :ref:`gpu-beta ` +- :ref:`volcano ` .. toctree:: :maxdepth: 1 @@ -24,3 +25,4 @@ Add-ons autoscaler metrics-server gpu-beta + volcano diff --git a/umn/source/add-ons/storage-driver_system_resource_add-on_mandatory.rst b/umn/source/add-ons/storage-driver_system_resource_add-on_mandatory.rst index d21b65b..9e1155a 100644 --- a/umn/source/add-ons/storage-driver_system_resource_add-on_mandatory.rst +++ b/umn/source/add-ons/storage-driver_system_resource_add-on_mandatory.rst @@ -15,7 +15,7 @@ storage-driver functions as a standard Kubernetes FlexVolume plug-in to allow co Notes and Constraints --------------------- -- For clusters created in CCE, Kubernetes v1.15.11 is a transitional version in which the FlexVolume plug-in (storage-driver) is compatible with the CSI plug-in (:ref:`everest `). Clusters of v1.17 and later versions do not support FlexVolume any more. You need to use the everest add-on. For details about CSI and FlexVolume, see :ref:`Differences Between CSI and FlexVolume Plug-ins `. +- For clusters created in CCE, Kubernetes v1.15.11 is a transitional version in which the FlexVolume plug-in (storage-driver) is compatible with the CSI plug-in (:ref:`everest `). Clusters of v1.17 and later versions do not support FlexVolume any more. You need to use the everest add-on. For details about CSI and FlexVolume, see :ref:`Differences Between CSI and FlexVolume Plug-ins `. - The FlexVolume plug-in will be maintained by Kubernetes developers, but new functionality will only be added to CSI. You are advised not to create storage that connects to the FlexVolume plug-in (storage-driver) in CCE any more. Otherwise, the storage resources may not function normally. - This add-on can be installed only in **clusters of v1.13 or earlier**. By default, the :ref:`everest ` add-on is installed when clusters of v1.15 or later are created. diff --git a/umn/source/add-ons/volcano.rst b/umn/source/add-ons/volcano.rst new file mode 100644 index 0000000..939a8bc --- /dev/null +++ b/umn/source/add-ons/volcano.rst @@ -0,0 +1,51 @@ +:original_name: cce_01_0193.html + +.. _cce_01_0193: + +volcano +======= + +Introduction +------------ + +Volcano is a batch processing platform based on Kubernetes. It provides a series of features required by machine learning, deep learning, bioinformatics, genomics, and other big data applications, as a powerful supplement to Kubernetes capabilities. + +Volcano provides general-purpose, high-performance computing capabilities, such as job scheduling engine, heterogeneous chip management, and job running management, serving end users through computing frameworks for different industries, such as AI, big data, gene sequencing, and rendering. (Volcano has been open-sourced in GitHub.) + +Volcano provides job scheduling, job management, and queue management for computing applications. Its main features are as follows: + +- Diverse computing frameworks, such as TensorFlow, MPI, and Spark, can run on Kubernetes in containers. Common APIs for batch computing jobs through CRD, various plug-ins, and advanced job lifecycle management are provided. +- Advanced scheduling capabilities are provided for batch computing and high-performance computing scenarios, including group scheduling, preemptive priority scheduling, packing, resource reservation, and task topology. +- Queues can be effectively managed for scheduling jobs. Complex job scheduling capabilities such as queue priority and multi-level queues are supported. + +Open source community: https://github.com/volcano-sh/volcano + +Installing the Add-on +--------------------- + +#. Log in to the CCE console. In the navigation pane, choose **Add-ons**. On the **Add-on Marketplace** tab page, click **Install Add-on** under **volcano**. + +#. On the **Install Add-on** page, select the cluster and the add-on version, and click **Next: Configuration**. + +#. Click **Install** to directly install the add-on. Currently, the volcano add-on has no configurable parameters. + + After the add-on is installed, click **Go Back to Previous Page**. On the **Add-on Instance** tab page, select the corresponding cluster to view the running instance. This indicates that the add-on has been installed on each node in the cluster. + +Upgrading the Add-on +-------------------- + +#. Log in to the CCE console. In the navigation pane, choose **Add-ons**. On the **Add-on Instance** tab page, click **Upgrade** under **volcano**. + + .. note:: + + - If the **Upgrade** button is not available, the current add-on is already up-to-date and no upgrade is required. + - During the upgrade, the volcano add-on of the original version on cluster nodes will be discarded, and the add-on of the target version will be installed. + +#. On the **Basic Information** page, select the add-on version and click **Next**. +#. Click **Upgrade**. + +Uninstalling the Add-on +----------------------- + +#. Log in to the CCE console. In the navigation pane, choose **Add-ons**. On the **Add-on Instance** tab page, click **Uninstall** under **volcano**. +#. In the dialog box displayed, click **Yes** to uninstall the add-on. diff --git a/umn/source/best_practice/auto_scaling/index.rst b/umn/source/best_practice/auto_scaling/index.rst new file mode 100644 index 0000000..eff15db --- /dev/null +++ b/umn/source/best_practice/auto_scaling/index.rst @@ -0,0 +1,14 @@ +:original_name: cce_bestpractice_0090.html + +.. _cce_bestpractice_0090: + +Auto Scaling +============ + +- :ref:`Using HPA and CA for Auto Scaling of Workloads and Nodes ` + +.. toctree:: + :maxdepth: 1 + :hidden: + + using_hpa_and_ca_for_auto_scaling_of_workloads_and_nodes diff --git a/umn/source/best_practice/auto_scaling/using_hpa_and_ca_for_auto_scaling_of_workloads_and_nodes.rst b/umn/source/best_practice/auto_scaling/using_hpa_and_ca_for_auto_scaling_of_workloads_and_nodes.rst new file mode 100644 index 0000000..198412e --- /dev/null +++ b/umn/source/best_practice/auto_scaling/using_hpa_and_ca_for_auto_scaling_of_workloads_and_nodes.rst @@ -0,0 +1,385 @@ +:original_name: cce_bestpractice_00282.html + +.. _cce_bestpractice_00282: + +Using HPA and CA for Auto Scaling of Workloads and Nodes +======================================================== + +Scenario +-------- + +The best way to handle surging traffic is to automatically adjust the number of machines based on the traffic volume or resource usage, which is called scaling. + +In CCE, the resources that can be used by containers are fixed during application deployment. Therefore, in auto scaling, pods are scaled first. The node resource usage increases only after the number of pods increases. Then, nodes can be scaled based on the node resource usage. How to configure auto scaling in CCE? + +Solution +-------- + +Two major auto scaling policies are HPA (Horizontal Pod Autoscaling) and CA (Cluster AutoScaling). HPA is for workload auto scaling and CA is for node auto scaling. + +HPA and CA work with each other. HPA requires sufficient cluster resources for successful scaling. When the cluster resources are insufficient, CA is needed to add nodes. If HPA reduces workloads, the cluster will have a large number of idle resources. In this case, CA needs to release nodes to avoid resource waste. + +As shown in :ref:`Figure 1 `, HPA performs scale-out based on the monitoring metrics. When cluster resources are insufficient, newly created pods are in Pending state. CA then checks these pending pods and selects the most appropriate node pool based on the configured scaling policy to scale out the node pool. + +.. _cce_bestpractice_00282__fig6540132372015: + +.. figure:: /_static/images/en-us_image_0000001290111529.png + :alt: **Figure 1** HPA and CA working flows + + **Figure 1** HPA and CA working flows + +Using HPA and CA can easily implement auto scaling in most scenarios. In addition, the scaling process of nodes and pods can be easily observed. + +This section uses an example to describe the auto scaling process using HPA and CA policies together. + +Preparations +------------ + +#. Create a cluster with one node. The node should have 2 cores of CPU and 4 GB of memory, or a higher specification, as well as an EIP to allow external access. If no EIP is bound to the node during node creation, you can manually bind one on the ECS console after creating the node. +#. Install add-ons for the cluster. + + - autoscaler: node scaling add-on + - metrics-server: an aggregator of resource usage data in a Kubernetes cluster. It can collect measurement data of major Kubernetes resources, such as pods, nodes, containers, and Services. + +#. Log in to the cluster node and run a computing-intensive application. When a user sends a request, the result needs to be calculated before being returned to the user. + + a. Create a PHP file named **index.php** to calculate the square root of the request for 1,000,000 times before returning **OK!**. + + .. code-block:: + + vi index.php + + Example file content: + + .. code-block:: + + + + b. Compile a Dockerfile to build an image. + + .. code-block:: + + vi Dockerfile + + Example Dockerfile: + + .. code-block:: + + FROM php:5-apache + COPY index.php /var/www/html/index.php + RUN chmod a+rx index.php + + c. Run the following command to build an image named **hpa-example** with the tag **latest**. + + .. code-block:: + + docker build -t hpa-example:latest . + + d. .. _cce_bestpractice_00282__li108181514125: + + (Optional) Log in to the SWR console, choose **Organization Management** in the navigation pane, and click **Create Organization** in the upper right corner to create an organization. + + Skip this step if you already have an organization. + + e. .. _cce_bestpractice_00282__li187221141362: + + In the navigation pane, choose **My Images** and then click **Upload Through Client**. On the page displayed, click **Generate a temporary login command** and click |image1| to copy the command. + + f. Run the login command copied in the previous step on the cluster node. If the login is successful, the message "Login Succeeded" is displayed. + + g. Tag the hpa-example image. + + **docker tag** **[Image name 1:Tag 1]** **[Image repository address]/[Organization name]/[Image name 2:Tag 2]** + + - **[Image name 1:Tag 1]**: name and tag of the local image to be uploaded. + - **[Image repository address]**: The domain name at the end of the login command in :ref:`5 ` is the image repository address, which can be obtained on the SWR console. + - **[Organization name]**: name of the organization created in :ref:`4 `. + - **[Image name 2:Tag 2]**: desired image name and tag to be displayed on the SWR console. + + Example: + + **docker tag hpa-example:latest swr.eu-de.otc.t-systems.com/group/hpa-example:latest** + + h. Push the image to the image repository. + + **docker push** **[Image repository address]/[Organization name]/[Image name 2:Tag 2]** + + Example: + + **docker push swr.eu-de.otc.t-systems.com/group/hpa-example:latest** + + The following information will be returned upon a successful push: + + .. code-block:: + + 6d6b9812c8ae: Pushed + ... + fe4c16cbf7a4: Pushed + latest: digest: sha256:eb7e3bbd*** size: ** + + To view the pushed image, go to the SWR console and refresh the **My Images** page. + +Creating a Node Pool and a Node Scaling Policy +---------------------------------------------- + +#. Log in to the CCE console, access the created cluster, click **Nodes** on the left, click the **Node Pools** tab, and click **Create Node Pool** in the upper right corner. + +#. Set node pool parameters, add a node with 2 vCPUs and 4 GB memory, and enable auto scaling. + + - **Nodes**: Set it to **1**, indicating that one node is created by default when a node pool is created. + - Auto Scaling: Enable the option, meaning that nodes will be automatically created or deleted in the node pool based on the cluster loads. + - **Max. Nodes**: Set it to **5**, indicating the maximum number of nodes in a node pool. + - **Specifications**: 2 vCPUs \| 4 GiB + + Retain the defaults for other parameters. For details, see `Creating a Node Pool `__. + +#. Click **Add-ons** on the left of the cluster console, click **Edit** under the autoscaler add-on, modify the add-on configuration, enable **Auto node scale-in**, and configure scale-in parameters. For example, trigger scale-in when the node resource utilization is less than 50%. + + |image2| + + After the preceding configurations, scale-out is performed based on the pending status of the pod and scale-in is triggered when the node resource utilization decreases. + +#. Click **Node Scaling** on the left of the cluster console and click **Create Node Scaling Policy** in the upper right corner. Node scaling policies added here trigger scale-out based on the CPU/memory allocation rate or periodically. + + As shown in the following figure, when the cluster CPU allocation rate is greater than 70%, one node will be added. A node scaling policy needs to be associated with a node pool. Multiple node pools can be associated. When you need to scale nodes, node with proper specifications will be added or reduced from the node pool based on the minimum waste principle. For details, see `Creating a Node Scaling Policy `__. + + |image3| + +Creating a Workload +------------------- + +Use the hpa-example image to create a Deployment with one replica. The image path is related to the organization uploaded to the SWR repository and needs to be replaced with the actual value. + +.. code-block:: + + kind: Deployment + apiVersion: apps/v1 + metadata: + name: hpa-example + spec: + replicas: 1 + selector: + matchLabels: + app: hpa-example + template: + metadata: + labels: + app: hpa-example + spec: + containers: + - name: container-1 + image: 'hpa-example:latest ' # Replace it with the address of the image you uploaded to SWR. + resources: + limits: # The value of limits must be the same as that of requests to prevent flapping during scaling. + cpu: 500m + memory: 200Mi + requests: + cpu: 500m + memory: 200Mi + imagePullSecrets: + - name: default-secret + +Then, create a NodePort Service for the workload so that the workload can be accessed from external networks. + +.. code-block:: + + kind: Service + apiVersion: v1 + metadata: + name: hpa-example + spec: + ports: + - name: cce-service-0 + protocol: TCP + port: 80 + targetPort: 80 + nodePort: 31144 + selector: + app: hpa-example + type: NodePort + +Creating an HPA Policy +---------------------- + +Create an HPA policy. As shown below, the policy is associated with the hpa-example workload, and the target CPU usage is 50%. + +There are two other annotations. One annotation defines the CPU thresholds, indicating that scaling is not performed when the CPU usage is between 30% and 70% to prevent impact caused by slight fluctuation. The other is the scaling time window, indicating that after the policy is successfully executed, a scaling operation will not be triggered again in this cooling interval to prevent impact caused by short-term fluctuation. + +.. code-block:: + + apiVersion: autoscaling/v2 + kind: HorizontalPodAutoscaler + metadata: + name: hpa-policy + annotations: + extendedhpa.metrics: '[{"type":"Resource","name":"cpu","targetType":"Utilization","targetRange":{"low":"30","high":"70"}}]' + extendedhpa.option: '{"downscaleWindow":"5m","upscaleWindow":"3m"}' + spec: + scaleTargetRef: + kind: Deployment + name: hpa-example + apiVersion: apps/v1 + minReplicas: 1 + maxReplicas: 100 + metrics: + - type: Resource + resource: + name: cpu + targetAverageUtilization: 50 + +Set the parameters as follows if you are using the console. + +|image4| + +Observing the Auto Scaling Process +---------------------------------- + +#. Check the cluster node status. In the following example, there are two nodes. + + .. code-block:: + + # kubectl get node + NAME STATUS ROLES AGE VERSION + 192.168.0.183 Ready 2m20s v1.17.9-r0-CCE21.1.1.3.B001-17.36.8 + 192.168.0.26 Ready 55m v1.17.9-r0-CCE21.1.1.3.B001-17.36.8 + + Check the HPA policy. The CPU usage of the target workload is 0%. + + .. code-block:: + + # kubectl get hpa hpa-policy + NAME REFERENCE TARGETS MINPODS MAXPODS REPLICAS AGE + hpa-policy Deployment/hpa-example 0%/50% 1 100 1 4m + +#. Run the following command to access the workload. In the following command, {ip:port} indicates the access address of the workload, which can be queried on the workload details page. + + **while true;do wget -q -O- http://**\ *{ip:port}*\ **; done** + + .. note:: + + If no EIP is displayed, the cluster node has not been assigned any EIP. You need to create one, bind it to the node, and synchronize node data. . + + Observe the scaling process of the workload. + + .. code-block:: + + # kubectl get hpa hpa-policy --watch + NAME REFERENCE TARGETS MINPODS MAXPODS REPLICAS AGE + hpa-policy Deployment/hpa-example 0%/50% 1 100 1 4m + hpa-policy Deployment/hpa-example 190%/50% 1 100 1 4m23s + hpa-policy Deployment/hpa-example 190%/50% 1 100 4 4m31s + hpa-policy Deployment/hpa-example 200%/50% 1 100 4 5m16s + hpa-policy Deployment/hpa-example 200%/50% 1 100 4 6m16s + hpa-policy Deployment/hpa-example 85%/50% 1 100 4 7m16s + hpa-policy Deployment/hpa-example 81%/50% 1 100 4 8m16s + hpa-policy Deployment/hpa-example 81%/50% 1 100 7 8m31s + hpa-policy Deployment/hpa-example 57%/50% 1 100 7 9m16s + hpa-policy Deployment/hpa-example 51%/50% 1 100 7 10m + hpa-policy Deployment/hpa-example 58%/50% 1 100 7 11m + + You can see that the CPU usage of the workload is 190% at 4m23s, which exceeds the target value. In this case, scaling is triggered to expand the workload to four replicas/pods. In the subsequent several minutes, the CPU usage does not decrease until 7m16s. This is because the new pods may not be successfully created. The possible cause is that resources are insufficient and the pods are in Pending state. During this period, nodes are added. + + At 7m16s, the CPU usage decreases, indicating that the pods are successfully created and start to bear traffic. The CPU usage decreases to 81% at 8m, still greater than the target value (50%) and the high threshold (70%). Therefore, 7 pods are added at 9m16s, and the CPU usage decreases to 51%, which is within the range of 30% to 70%. From then on, the number of pods remains 7. + + In the following output, you can see the workload scaling process and the time when the HPA policy takes effect. + + .. code-block:: + + # kubectl describe deploy hpa-example + ... + Events: + Type Reason Age From Message + ---- ------ ---- ---- ------- + Normal ScalingReplicaSet 25m deployment-controller Scaled up replica set hpa-example-79dd795485 to 1 + Normal ScalingReplicaSet 20m deployment-controller Scaled up replica set hpa-example-79dd795485 to 4 + Normal ScalingReplicaSet 16m deployment-controller Scaled up replica set hpa-example-79dd795485 to 7 + # kubectl describe hpa hpa-policy + ... + Events: + Type Reason Age From Message + ---- ------ ---- ---- ------- + Normal SuccessfulRescale 20m horizontal-pod-autoscaler New size: 4; reason: cpu resource utilization (percentage of request) above target + Normal SuccessfulRescale 16m horizontal-pod-autoscaler New size: 7; reason: cpu resource utilization (percentage of request) above target + + Check the number of nodes. The following output shows that two nodes are added. + + .. code-block:: + + # kubectl get node + NAME STATUS ROLES AGE VERSION + 192.168.0.120 Ready 3m5s v1.17.9-r0-CCE21.1.1.3.B001-17.36.8 + 192.168.0.136 Ready 6m58s v1.17.9-r0-CCE21.1.1.3.B001-17.36.8 + 192.168.0.183 Ready 18m v1.17.9-r0-CCE21.1.1.3.B001-17.36.8 + 192.168.0.26 Ready 71m v1.17.9-r0-CCE21.1.1.3.B001-17.36.8 + + You can also view the scaling history on the console. For example, the CA policy is executed once when the CPU allocation rate in the cluster is greater than 70%, and the number of nodes in the node pool is increased from 2 to 3. The new node is automatically added by autoscaler based on the pending state of pods in the initial phase of HPA. + + The node scaling process is as follows: + + a. After the number of pods changes to 4, the pods are in Pending state due to insufficient resources. As a result, the default scale-out policy of the autoscaler add-on is triggered, and the number of nodes is increased by one. + b. The second node scale-out is triggered because the CPU allocation rate in the cluster is greater than 70%. As a result, the number of nodes is increased by one, which is recorded in the scaling history on the console. Scaling based on the allocation rate ensures that the cluster has sufficient resources. + +#. Stop accessing the workload and check the number of pods. + + .. code-block:: + + # kubectl get hpa hpa-policy --watch + NAME REFERENCE TARGETS MINPODS MAXPODS REPLICAS AGE + hpa-policy Deployment/hpa-example 50%/50% 1 100 7 12m + hpa-policy Deployment/hpa-example 21%/50% 1 100 7 13m + hpa-policy Deployment/hpa-example 0%/50% 1 100 7 14m + hpa-policy Deployment/hpa-example 0%/50% 1 100 7 18m + hpa-policy Deployment/hpa-example 0%/50% 1 100 3 18m + hpa-policy Deployment/hpa-example 0%/50% 1 100 3 19m + hpa-policy Deployment/hpa-example 0%/50% 1 100 3 19m + hpa-policy Deployment/hpa-example 0%/50% 1 100 3 19m + hpa-policy Deployment/hpa-example 0%/50% 1 100 3 19m + hpa-policy Deployment/hpa-example 0%/50% 1 100 3 23m + hpa-policy Deployment/hpa-example 0%/50% 1 100 3 23m + hpa-policy Deployment/hpa-example 0%/50% 1 100 1 23m + + You can see that the CPU usage is 21% at 13m. The number of pods is reduced to 3 at 18m, and then reduced to 1 at 23m. + + In the following output, you can see the workload scaling process and the time when the HPA policy takes effect. + + .. code-block:: + + # kubectl describe deploy hpa-example + ... + Events: + Type Reason Age From Message + ---- ------ ---- ---- ------- + Normal ScalingReplicaSet 25m deployment-controller Scaled up replica set hpa-example-79dd795485 to 1 + Normal ScalingReplicaSet 20m deployment-controller Scaled up replica set hpa-example-79dd795485 to 4 + Normal ScalingReplicaSet 16m deployment-controller Scaled up replica set hpa-example-79dd795485 to 7 + Normal ScalingReplicaSet 6m28s deployment-controller Scaled down replica set hpa-example-79dd795485 to 3 + Normal ScalingReplicaSet 72s deployment-controller Scaled down replica set hpa-example-79dd795485 to 1 + # kubectl describe hpa hpa-policy + ... + Events: + Type Reason Age From Message + ---- ------ ---- ---- ------- + Normal SuccessfulRescale 20m horizontal-pod-autoscaler New size: 4; reason: cpu resource utilization (percentage of request) above target + Normal SuccessfulRescale 16m horizontal-pod-autoscaler New size: 7; reason: cpu resource utilization (percentage of request) above target + Normal SuccessfulRescale 6m45s horizontal-pod-autoscaler New size: 3; reason: All metrics below target + Normal SuccessfulRescale 90s horizontal-pod-autoscaler New size: 1; reason: All metrics below target + + You can also view the HPA policy execution history on the console. Wait until the one node is reduced. + + The reason why the other two nodes in the node pool are not reduced is that they both have pods in the kube-system namespace (and these pods are not created by DaemonSets). For details about node scale-in, see `Node Scaling Mechanisms `__. + +Summary +------- + +Using HPA and CA can easily implement auto scaling in most scenarios. In addition, the scaling process of nodes and pods can be easily observed. + +.. |image1| image:: /_static/images/en-us_image_0000001360670117.png +.. |image2| image:: /_static/images/en-us_image_0000001274543860.png +.. |image3| image:: /_static/images/en-us_image_0000001274544060.png +.. |image4| image:: /_static/images/en-us_image_0000001274864616.png diff --git a/umn/source/reference/checklist_for_migrating_containerized_applications_to_the_cloud.rst b/umn/source/best_practice/checklist_for_deploying_containerized_applications_in_the_cloud.rst similarity index 55% rename from umn/source/reference/checklist_for_migrating_containerized_applications_to_the_cloud.rst rename to umn/source/best_practice/checklist_for_deploying_containerized_applications_in_the_cloud.rst index 36fa0f0..45ac85f 100644 --- a/umn/source/reference/checklist_for_migrating_containerized_applications_to_the_cloud.rst +++ b/umn/source/best_practice/checklist_for_deploying_containerized_applications_in_the_cloud.rst @@ -1,66 +1,66 @@ -:original_name: cce_faq_00006.html +:original_name: cce_bestpractice_00006.html -.. _cce_faq_00006: +.. _cce_bestpractice_00006: -Checklist for Migrating Containerized Applications to the Cloud +Checklist for Deploying Containerized Applications in the Cloud =============================================================== Overview -------- -Cloud Container Engine (CCE) provides highly scalable, high-performance, enterprise-class Kubernetes clusters and supports Docker containers. With CCE, you can easily deploy, manage, and scale out containerized applications. - -This checklist describes the system availability, data reliability, and O&M reliability of migrating containerized applications to the cloud. It contains check items, impact, FAQs, and examples, helping you migrate services to CCE and avoid application exceptions or cluster reconstruction caused by improper use. +Security, efficiency, stability, and availability are common requirements on all cloud services. To meet these requirements, the system availability, data reliability, and O&M stability must be perfectly coordinated. This checklist describes the check items for deploying containerized applications on the cloud to help you efficiently migrate services to CCE, reducing potential cluster or application exceptions caused by improper use. Check Items ----------- .. table:: **Table 1** System availability - +----------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | Category | Check Item | Type | Impact | - +==========+==============================================================================================================================================================================================================================+==================+=========================================================================================================================================================================================================================================================================================================+ - | Cluster | When creating a cluster, set **High Availability** to **Yes**. | Reliability | A cluster with **High Availability** set to **No** is a non-HA cluster with only one master. If the master node is faulty, the entire cluster will be unavailable. Therefore, you are advised to create an HA cluster in the production environment. | - +----------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | | Before creating a cluster, determine the container network model that is suitable to the service scenario. | Network planning | Different container network models apply to different scenarios. | - +----------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | | Before creating a cluster, plan the subnet CIDR block and container network CIDR block properly. | Network planning | If the range of the subnet and container network CIDR blocks is not properly set, the number of available nodes in the cluster will be less than the number of nodes supported by the cluster. Network planning has different constraints on different container network models. | - +----------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | | Before creating a cluster, properly plan CIDR blocks for the related Direct Connect, peering connection, container network, service network, and subnet to avoid IP address conflicts. | Network planning | If CIDR blocks are not properly set and IP address conflicts occur, service access will be affected. | - +----------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | Workload | When creating a workload, set the upper and lower limits of CPU and memory resources. | Deployment | If the upper and lower resource limits are not set for an application, a resource leak of this application will make resources unavailable for other applications deployed on the same node. In addition, applications that do not have upper and lower resource limits cannot be accurately monitored. | - +----------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | | When creating an application, set the number of pods to more than two and set the scheduling policy based on service requirements. | Reliability | A single-pod application will be faulty if the node or pod is faulty. | - +----------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | | Properly set affinity and anti-affinity. | Reliability | If affinity and anti-affinity are both configured for an application that provides Services externally, Services may fail to be accessed after the application is upgraded or restarted. | - +----------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | | When creating a workload, set the health check policy, that is, set the workload liveness probe and the readiness probe. | Reliability | If the two probes are not set, pods cannot detect service exceptions or automatically restart the service to restore it. This results in a situation where the pod status is normal but the service in the pod is abnormal. | - +----------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | | When creating a workload, set the pre-stop processing command (**Lifecycle** > **Pre-Stop**) to ensure that the services running in the pods can be completed in advance in the case of application upgrade or pod deletion. | Reliability | If the pre-stop processing command is not configured, the pod will be directly killed and services will be interrupted during application upgrade. | - +----------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | | When creating a Service, select an access mode based on service requirements. Currently, the following types of access modes are supported: intra-cluster access, intra-VPC access, and external access. | Deployment | If the access mode is not properly set, internal and external access may be in disorder and resources may be wasted. | - +----------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + +----------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | Category | Check Item | Type | Impact | + +==========+==============================================================================================================================================================================================================================+==================+============================================================================================================================================================================================================================================================================================+ + | Cluster | Before creating a cluster, properly plan the node network and container network based on service requirements to allow subsequent service expansion. | Network planning | If the subnet or container CIDR block where the cluster resides is small, the number of available nodes supported by the cluster may be less than required. | + +----------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | Before creating a cluster, properly plan CIDR blocks for the related Direct Connect, peering connection, container network, service network, and subnet to avoid IP address conflicts. | Network planning | If CIDR blocks are not properly set and IP address conflicts occur, service access will be affected. | + +----------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | When a cluster is created, the default security group is automatically created and bound to the cluster. You can set custom security group rules based on service requirements. | Deployment | Security groups are key to security isolation. Improper security policy configuration may cause security risks and service connectivity problems. | + +----------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | Enable the multi-master node mode, and set the number of master nodes to **3** when creating a cluster. | Reliability | After the multi-master node mode is enabled, three master nodes will be created. If a master node is faulty, the cluster can still be available without affecting service functions. In commercial scenarios, it is advised to enable the multi-master node mode. | + +----------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | When creating a cluster, select a proper network model, such as container tunnel network or VPC network. | Deployment | After a cluster is created, the network model cannot be changed. Exercise caution when selecting a network model. | + +----------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | Workload | When creating a workload, you need to set the CPU and memory limits to improve service robustness. | Deployment | When multiple applications are deployed on the same node, if the upper and lower resource limits are not set for an application, resource leakage occurs. As a result, resources cannot be allocated to other applications, and the application monitoring information will be inaccurate. | + +----------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | When creating a workload, you can set probes for container health check, including **liveness probe** and **readiness probe**. | Reliability | If the health check function is not configured, a pod cannot detect service exceptions or automatically restart the service to restore it. This results in a situation where the pod status is normal but the service in the pod is abnormal. | + +----------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | When creating a workload, select a proper access mode (Service). Currently, the following types of Services are supported: ClusterIP, NodePort, and LoadBalancer. | Deployment | Improper Service configuration may cause logic confusion for internal and external access and resource waste. | + +----------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | When creating a workload, do not set the number of replicas for a single pod. Set a proper node scheduling policy based on your service requirements. | Reliability | For example, if the number of replicas of a single pod is set, the service will be abnormal when the node or pod is abnormal. To ensure that your pods can be successfully scheduled, ensure that the node has idle resources for container scheduling after you set the scheduling rule. | + +----------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | Properly set affinity and anti-affinity. | Reliability | If affinity and anti-affinity are both configured for an application that provides Services externally, Services may fail to be accessed after the application is upgraded or restarted. | + +----------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | When creating a workload, set the pre-stop processing command (**Lifecycle** > **Pre-Stop**) to ensure that the services running in the pods can be completed in advance in the case of application upgrade or pod deletion. | Reliability | If the pre-stop processing command is not configured, the pod will be directly killed and services will be interrupted during application upgrade. | + +----------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ .. table:: **Table 2** Data reliability - +----------------------------+-------------------------------------------------------------------+-------------+----------------------------------------------------------------------------------------+ - | Category | Check Item | Type | Impact | - +============================+===================================================================+=============+========================================================================================+ - | Container data persistency | Store application data in the cloud, rather than on a local disk. | Reliability | If a node is faulty and cannot be restored, data on the local disk cannot be restored. | - +----------------------------+-------------------------------------------------------------------+-------------+----------------------------------------------------------------------------------------+ - | Backup | Back up application data. | Reliability | Data cannot be restored after being lost. | - +----------------------------+-------------------------------------------------------------------+-------------+----------------------------------------------------------------------------------------+ + +----------------------------+-----------------------------------------------------------------+-------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | Category | Check Item | Type | Impact | + +============================+=================================================================+=============+================================================================================================================================================================================+ + | Container data persistency | Select a proper data volume type based on service requirements. | Reliability | When a node is faulty and cannot be recovered, data in the local disk cannot be recovered. Therefore, you are advised to use cloud storage volumes to ensure data reliability. | + +----------------------------+-----------------------------------------------------------------+-------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | Backup | Back up application data. | Reliability | Data cannot be restored after being lost. | + +----------------------------+-----------------------------------------------------------------+-------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ .. table:: **Table 3** O&M reliability - +---------------+------------------------------------------------------------------------------------------------------------------------------------------------+------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | Category | Check Item | Type | Impact | - +===============+================================================================================================================================================+============+====================================================================================================================================================================================================+ - | Project | The quotas of ECS, VPC, subnet, EIP, and EVS resources must meet customer requirements. | Deployment | If the quota is insufficient, resources will fail to be created. Specifically, users who have configured automatic capacity expansion must have sufficient resource quotas. | - +---------------+------------------------------------------------------------------------------------------------------------------------------------------------+------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | | Do not install private software or modify OS configurations on a cluster node. | Deployment | If private software is installed on a cluster node or OS configurations are modified, exceptions may occur on Kubernetes components on the node, making it unavailable for application deployment. | - +---------------+------------------------------------------------------------------------------------------------------------------------------------------------+------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | | Do not modify information about resources created by CCE, such as security groups and EVS disks. Resources created by CCE are labeled **cce**. | Deployment | CCE cluster functions may be abnormal. | - +---------------+------------------------------------------------------------------------------------------------------------------------------------------------+------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | Proactive O&M | Configure alarm monitoring on AOM for the applications you deployed in CCE clusters. | Monitoring | If alarm monitoring is not configured, you cannot receive alarms when applications are faulty and need to manually locate the faults. | - +---------------+------------------------------------------------------------------------------------------------------------------------------------------------+------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + +---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | Category | Check Item | Type | Impact | + +===============+=====================================================================================================================================================================================================================================================================================+============+========================================================================================================================================================================================================+ + | Project | The quotas of ECS, VPC, subnet, EIP, and EVS resources must meet customer requirements. | Deployment | If the quota is insufficient, resources will fail to be created. Specifically, users who have configured auto scaling must have sufficient resource quotas. | + +---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | You are not advised to modify kernel parameters, system configurations, cluster core component versions, security groups, and ELB-related parameters on cluster nodes, or install software that has not been verified. | Deployment | Exceptions may occur on CCE clusters or Kubernetes components on the node, making the node unavailable for application deployment. | + +---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | Do not modify information about resources created by CCE, such as security groups and EVS disks. Resources created by CCE are labeled **cce**. | Deployment | CCE cluster functions may be abnormal. | + +---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | Proactive O&M | CCE provides multi-dimensional monitoring and alarm reporting functions, and supports basic resource monitoring based on fine-grained metrics by interconnecting with Application Operations Management (AOM). Alarms allow users to locate and rectify faults as soon as possible. | Monitoring | If the alarms are not configured, the standard of container cluster performance cannot be established. When an exception occurs, you cannot receive alarms and will need to manually locate the fault. | + +---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ diff --git a/umn/source/best_practice/cluster/adding_a_second_data_disk_to_a_node_in_a_cce_cluster.rst b/umn/source/best_practice/cluster/adding_a_second_data_disk_to_a_node_in_a_cce_cluster.rst new file mode 100644 index 0000000..a92488a --- /dev/null +++ b/umn/source/best_practice/cluster/adding_a_second_data_disk_to_a_node_in_a_cce_cluster.rst @@ -0,0 +1,103 @@ +:original_name: cce_bestpractice_00190.html + +.. _cce_bestpractice_00190: + +Adding a Second Data Disk to a Node in a CCE Cluster +==================================================== + +You can use the pre-installation script feature to configure CCE cluster nodes (ECSs). + +.. note:: + + - When creating a node in a cluster of v1.13.10 or later, if a data disk is not managed by LVM, follow instructions in this section to format the data disk before adding the disk. Otherwise, the data disk will still be managed by LVM. + - When creating a node in a cluster earlier than v1.13.10, you must format the data disks that are not managed by LVM. Otherwise, either these data disks or the first data disk will be managed by LVM. + +Before using this feature, write a script that can format data disks and save it to your OBS bucket. This script must be executed by user **root**. + +**Input Parameters** + +#. Set the script name to **formatdisk.sh**, save the script to your OBS bucket, and obtain the address of the script in OBS. +#. You need to specify the size of the Docker data disk (the data disk managed by LVM is called the Docker data disk). The size of the Docker disk must be different from that of the second disk. For example, the Docker data disk is 100 GB and the new disk is 110 GB. +#. Set the mount path of the second data disk, for example, **/data/code**. + +Run the following command in the pre-installation script to format the disk: + +.. code-block:: + + cd /tmp;curl -k -X GET OBS bucket address /formatdisk.sh -1 -O;fdisk -l;sleep 30;bash -x formatdisk.sh 100 /data/code;fdisk -l + +Example script (**formatdisk.sh**): + +.. code-block:: + + dockerdisksize=$1 + mountdir=$2 + systemdisksize=40 + i=0 + while [ 20 -gt $i ]; do + echo $i; + if [ $(lsblk -o KNAME,TYPE | grep disk | grep -v nvme | awk '{print $1}' | awk '{ print "/dev/"$1}' |wc -l) -ge 3 ]; then + break + else + sleep 5 + fi; + i=$[i+1] + done + all_devices=$(lsblk -o KNAME,TYPE | grep disk | grep -v nvme | awk '{print $1}' | awk '{ print "/dev/"$1}') + for device in ${all_devices[@]}; do + isRawDisk=$(lsblk -n $device 2>/dev/null | grep disk | wc -l) + if [[ ${isRawDisk} > 0 ]]; then + # is it partitioned ? + match=$(lsblk -n $device 2>/dev/null | grep -v disk | wc -l) + if [[ ${match} > 0 ]]; then + # already partited + [[ -n "${DOCKER_BLOCK_DEVICES}" ]] && echo "Raw disk ${device} has been partition, will skip this device" + continue + fi + else + isPart=$(lsblk -n $device 2>/dev/null | grep part | wc -l) + if [[ ${isPart} -ne 1 ]]; then + # not parted + [[ -n "${DOCKER_BLOCK_DEVICES}" ]] && echo "Disk ${device} has not been partition, will skip this device" + continue + fi + # is used ? + match=$(lsblk -n $device 2>/dev/null | grep -v part | wc -l) + if [[ ${match} > 0 ]]; then + # already used + [[ -n "${DOCKER_BLOCK_DEVICES}" ]] && echo "Disk ${device} has been used, will skip this device" + continue + fi + isMount=$(lsblk -n -o MOUNTPOINT $device 2>/dev/null) + if [[ -n ${isMount} ]]; then + # already used + [[ -n "${DOCKER_BLOCK_DEVICES}" ]] && echo "Disk ${device} has been used, will skip this device" + continue + fi + isLvm=$(sfdisk -lqL 2>>/dev/null | grep $device | grep "8e.*Linux LVM") + if [[ ! -n ${isLvm} ]]; then + # part system type is not Linux LVM + [[ -n "${DOCKER_BLOCK_DEVICES}" ]] && echo "Disk ${device} system type is not Linux LVM, will skip this device" + continue + fi + fi + block_devices_size=$(lsblk -n -o SIZE $device 2>/dev/null | awk '{ print $1}') + if [[ ${block_devices_size}"x" != "${dockerdisksize}Gx" ]] && [[ ${block_devices_size}"x" != "${systemdisksize}Gx" ]]; then + echo "n + p + 1 + + + w + " | fdisk $device + mkfs -t ext4 ${device}1 + mkdir -p $mountdir + uuid=$(blkid ${device}1 |awk '{print $2}') + echo "${uuid} $mountdir ext4 noatime 0 0" | tee -a /etc/fstab >/dev/null + mount $mountdir + fi + done + +.. note:: + + If the preceding example cannot be executed, use the dos2unix tool to convert the format. diff --git a/umn/source/best_practice/cluster/connecting_to_multiple_clusters_using_kubectl.rst b/umn/source/best_practice/cluster/connecting_to_multiple_clusters_using_kubectl.rst new file mode 100644 index 0000000..d701eb0 --- /dev/null +++ b/umn/source/best_practice/cluster/connecting_to_multiple_clusters_using_kubectl.rst @@ -0,0 +1,313 @@ +:original_name: cce_bestpractice_00254.html + +.. _cce_bestpractice_00254: + +Connecting to Multiple Clusters Using kubectl +============================================= + +Painpoint +--------- + +When you have multiple CCE clusters, you may find it difficult to efficiently connect to all of them. + +Solution +-------- + +This section describes how to configure access to multiple clusters by modifying **kubeconfig.json**. The file describes multiple clusters, users, and contexts. To access different clusters, run the **kubectl config use-context** command to switch between contexts. + + +.. figure:: /_static/images/en-us_image_0261820020.png + :alt: **Figure 1** Using kubectl to connect to multiple clusters + + **Figure 1** Using kubectl to connect to multiple clusters + +Prerequisites +------------- + +kubectl can access multiple clusters. + +Introduction to kubeconfig.json +------------------------------- + +kubeconfig.json is the configuration file of kubectl. You can download it on the cluster details page. + +|image1| + +The content of kubeconfig.json is as follows: + +.. code-block:: + + { + "kind": "Config", + "apiVersion": "v1", + "preferences": {}, + "clusters": [{ + "name": "internalCluster", + "cluster": { + "server": "https://192.168.0.85:5443", + "certificate-authority-data": "LS0tLS1CRUULIE..." + } + }, { + "name": "externalCluster", + "cluster": { + "server": "https://xxx.xxx.xxx.xxx:5443", + "insecure-skip-tls-verify": true + } + }], + "users": [{ + "name": "user", + "user": { + "client-certificate-data": "LS0tLS1CRUdJTiBDRVJ...", + "client-key-data": "LS0tLS1CRUdJTiBS..." + } + }], + "contexts": [{ + "name": "internal", + "context": { + "cluster": "internalCluster", + "user": "user" + } + }, { + "name": "external", + "context": { + "cluster": "externalCluster", + "user": "user" + } + }], + "current-context": "external" + } + +It mainly consists of three sections. + +- **clusters**: describes the cluster information, mainly the access address of the cluster. +- **users**: describes information about the users who access the cluster. It includes the **client-certificate-data** and **client-key-data** certificate files. +- **contexts**: describes the configuration contexts. You switch between contexts to access different clusters. A context is associated with **user** and **cluster**, that is, it defines which user accesses which cluster. + +The preceding kubeconfig.json defines the private network address and public network address of the cluster as two clusters with two different contexts. You can switch the context to use different addresses to access the cluster. + +Configuring Access to Multiple Clusters +--------------------------------------- + +The following steps walk you through the procedure of configuring access to two clusters by modifying kubeconfig.json. + +This example configures only the public network access to the clusters. If you want to access multiple clusters over private networks, retain the **clusters** field and ensure that the clusters can be accessed over private networks. Its configuration is similar to that described in this example. + +#. Download kubeconfig.json of the two clusters and delete the lines related to private network access, as shown in the following figure. + + - Cluster A: + + .. code-block:: + + { + "kind": "Config", + "apiVersion": "v1", + "preferences": {}, + "clusters": [ { + "name": "externalCluster", + "cluster": { + "server": "https://119.xxx.xxx.xxx:5443", + "insecure-skip-tls-verify": true + } + }], + "users": [{ + "name": "user", + "user": { + "client-certificate-data": "LS0tLS1CRUdJTxM...", + "client-key-data": "LS0tLS1CRUdJTiB...." + } + }], + "contexts": [{ + "name": "external", + "context": { + "cluster": "externalCluster", + "user": "user" + } + }], + "current-context": "external" + } + + - Cluster B: + + .. code-block:: + + { + "kind": "Config", + "apiVersion": "v1", + "preferences": {}, + "clusters": [ { + "name": "externalCluster", + "cluster": { + "server": "https://124.xxx.xxx.xxx:5443", + "insecure-skip-tls-verify": true + } + }], + "users": [{ + "name": "user", + "user": { + "client-certificate-data": "LS0tLS1CRUdJTxM...", + "client-key-data": "LS0rTUideUdJTiB...." + } + }], + "contexts": [{ + "name": "external", + "context": { + "cluster": "externalCluster", + "user": "user" + } + }], + "current-context": "external" + } + + The preceding files have the same structure except that the **client-certificate-data** and **client-key-data** fields of **user** and the **clusters.cluster.server** field are different. + +#. Modify the **name** field as follows: + + - Cluster A: + + .. code-block:: + + { + "kind": "Config", + "apiVersion": "v1", + "preferences": {}, + "clusters": [ { + "name": "Cluster-A", + "cluster": { + "server": "https://119.xxx.xxx.xxx:5443", + "insecure-skip-tls-verify": true + } + }], + "users": [{ + "name": "Cluster-A-user", + "user": { + "client-certificate-data": "LS0tLS1CRUdJTxM...", + "client-key-data": "LS0tLS1CRUdJTiB...." + } + }], + "contexts": [{ + "name": "Cluster-A-Context", + "context": { + "cluster": "Cluster-A", + "user": "Cluster-A-user" + } + }], + "current-context": "Cluster-A-Context" + } + + - Cluster B: + + .. code-block:: + + { + "kind": "Config", + "apiVersion": "v1", + "preferences": {}, + "clusters": [ { + "name": "Cluster-B", + "cluster": { + "server": "https://124.xxx.xxx.xxx:5443", + "insecure-skip-tls-verify": true + } + }], + "users": [{ + "name": "Cluster-B-user", + "user": { + "client-certificate-data": "LS0tLS1CRUdJTxM...", + "client-key-data": "LS0rTUideUdJTiB...." + } + }], + "contexts": [{ + "name": "Cluster-B-Context", + "context": { + "cluster": "Cluster-B", + "user": "Cluster-B-user" + } + }], + "current-context": "Cluster-B-Context" + } + +#. Combine these two files. + + The file structure remains unchanged. Combine the contents of **clusters**, **users**, and **contexts** as follows: + + .. code-block:: + + { + "kind": "Config", + "apiVersion": "v1", + "preferences": {}, + "clusters": [ { + "name": "Cluster-A", + "cluster": { + "server": "https://119.xxx.xxx.xxx:5443", + "insecure-skip-tls-verify": true + } + }, + { + "name": "Cluster-B", + "cluster": { + "server": "https://124.xxx.xxx.xxx:5443", + "insecure-skip-tls-verify": true + } + }], + "users": [{ + "name": "Cluster-A-user", + "user": { + "client-certificate-data": "LS0tLS1CRUdJTxM...", + "client-key-data": "LS0tLS1CRUdJTiB...." + } + }, + { + "name": "Cluster-B-user", + "user": { + "client-certificate-data": "LS0tLS1CRUdJTxM...", + "client-key-data": "LS0rTUideUdJTiB...." + } + }], + "contexts": [{ + "name": "Cluster-A-Context", + "context": { + "cluster": "Cluster-A", + "user": "Cluster-A-user" + } + }, + { + "name": "Cluster-B-Context", + "context": { + "cluster": "Cluster-B", + "user": "Cluster-B-user" + } + }], + "current-context": "Cluster-A-Context" + } + +Verification +------------ + +Run the following commands to copy the file to the kubectl configuration path: + +**mkdir -p $HOME/.kube** + +**mv -f kubeconfig.json $HOME/.kube/config** + +Run the kubectl commands to check whether the two clusters can be connected. + +.. code-block:: + + # kubectl config use-context Cluster-A-Context + Switched to context "Cluster-A-Context". + # kubectl cluster-info + Kubernetes control plane is running at https://119.xxx.xxx.xxx:5443 + CoreDNS is running at https://119.xxx.xxx.xxx:5443/api/v1/namespaces/kube-system/services/coredns:dns/proxy + + To further debug and diagnose cluster problems, use 'kubectl cluster-info dump'. + + # kubectl config use-context Cluster-B-Context + Switched to context "Cluster-B-Context". + # kubectl cluster-info + Kubernetes control plane is running at https://124.xxx.xxx.xxx:5443 + CoreDNS is running at https://124.xxx.xxx.xxx:5443/api/v1/namespaces/kube-system/services/coredns:dns/proxy + + To further debug and diagnose cluster problems, use 'kubectl cluster-info dump'. + +.. |image1| image:: /_static/images/en-us_image_0000001274882416.png diff --git a/umn/source/best_practice/cluster/index.rst b/umn/source/best_practice/cluster/index.rst new file mode 100644 index 0000000..4d07a38 --- /dev/null +++ b/umn/source/best_practice/cluster/index.rst @@ -0,0 +1,16 @@ +:original_name: cce_bestpractice_0050.html + +.. _cce_bestpractice_0050: + +Cluster +======= + +- :ref:`Connecting to Multiple Clusters Using kubectl ` +- :ref:`Adding a Second Data Disk to a Node in a CCE Cluster ` + +.. toctree:: + :maxdepth: 1 + :hidden: + + connecting_to_multiple_clusters_using_kubectl + adding_a_second_data_disk_to_a_node_in_a_cce_cluster diff --git a/umn/source/best_practice/container/configuring_core_dumps.rst b/umn/source/best_practice/container/configuring_core_dumps.rst new file mode 100644 index 0000000..74c28dc --- /dev/null +++ b/umn/source/best_practice/container/configuring_core_dumps.rst @@ -0,0 +1,78 @@ +:original_name: cce_bestpractice_0325.html + +.. _cce_bestpractice_0325: + +Configuring Core Dumps +====================== + +Challenges +---------- + +Linux allows you to create a core dump file if an application crashes, which contains the data the application had in memory at the time of the crash. You can analyze the file to locate the fault. + +Generally, when a service application crashes, its container exits and is reclaimed and destroyed. Therefore, container core files need to be permanently stored on the host or cloud storage. This topic describes how to configure container core dumps. + +Enabling Core Dump on a Node +---------------------------- + +Log in to the node, run the following command to enable core dump, and set the path and format for storing core files: + +**echo "/tmp/cores/core.%h.%e.%p.%t" > /proc/sys/kernel/core_pattern** + +Parameters: + +- **%h**: host name (or pod name). You are advised to configure this parameter. +- **%e**: program file name. You are advised to configure this parameter. +- **%p**: (optional) process ID. +- **%t**: (optional) time of the core dump. + +You can also configure a pre-installation or post-installation script to automatically run this command when creating a node. + +Permanently Storing Core Dumps +------------------------------ + +A core file can be stored in your host (using a hostPath volume) or cloud storage (using a PVC). The following is an example YAML file for using a hostPath volume. + +.. code-block:: + + apiVersion: v1 + kind: Pod + metadata: + name: coredump + spec: + volumes: + - name: coredump-path + hostPath: + path: /home/coredump + containers: + - name: ubuntu + image: ubuntu:12.04 + command: ["/bin/sleep","3600"] + volumeMounts: + - mountPath: /tmp/cores + name: coredump-path + +Create a pod using kubectl. + +**kubectl create -f pod.yaml** + +Verification +------------ + +After the pod is created, access the container and trigger a segmentation fault of the current shell terminal. + +.. code-block:: + + $ kubectl get pod + NAME READY STATUS RESTARTS AGE + coredump 1/1 Running 0 56s + $ kubectl exec -it coredump -- /bin/bash + root@coredump:/# kill -s SIGSEGV $$ + command terminated with exit code 139 + +Log in to the node and check whether a core file is generated in the **/home/coredump** directory. The following example indicates that a core file is generated. + +.. code-block:: + + # ls /home/coredump + core.coredump.bash.18.1650438992 diff --git a/umn/source/best_practice/container/how_do_i_select_a_container_runtime.rst b/umn/source/best_practice/container/how_do_i_select_a_container_runtime.rst new file mode 100644 index 0000000..ec4d746 --- /dev/null +++ b/umn/source/best_practice/container/how_do_i_select_a_container_runtime.rst @@ -0,0 +1,104 @@ +:original_name: cce_bestpractice_0140.html + +.. _cce_bestpractice_0140: + +How Do I Select a Container Runtime? +==================================== + +containerd vs Docker +-------------------- + +Container runtime, one of the most important components of Kubernetes, manages the lifecycle of images and containers. kubelet interacts with a container runtime through the Container Runtime Interface (CRI). + +CCE supports containerd and Docker as your runtime. **containerd is recommended for its shorter traces, fewer components, and stability.** + +Select Docker for the following scenarios: + +- Docker-in-Docker needs to be used. +- Commands such as **docker build/push/save/load** need to be run on the CCE node. +- Docker APIs need to be called. +- Docker Compose or Docker Swarm needs to be used. + +Common Commands of containerd and Docker +---------------------------------------- + +containerd does not support Docker APIs and Docker CLI, but you can run crictl commands to implement similar functions. + +.. table:: **Table 1** Image-related commands + + +-----+---------------------------------------------------+---------------------------------------------------+-----------------------+ + | No. | Docker Command | containerd Command | Remarks | + +=====+===================================================+===================================================+=======================+ + | 1 | docker images [Option] [Image name[:Tag]] | crictl images [Option] [Image name[:Tag]] | List local images. | + +-----+---------------------------------------------------+---------------------------------------------------+-----------------------+ + | 2 | docker pull [Option] *Image name*\ [:Tag|@DIGEST] | crictl pull [Option] *Image name*\ [:Tag|@DIGEST] | Pull images. | + +-----+---------------------------------------------------+---------------------------------------------------+-----------------------+ + | 3 | docker push | None | Push an image. | + +-----+---------------------------------------------------+---------------------------------------------------+-----------------------+ + | 4 | docker rmi [Option] *Image*... | crictl rmi [Option] *Image ID*... | Delete a local image. | + +-----+---------------------------------------------------+---------------------------------------------------+-----------------------+ + | 5 | docker inspect *Image ID* | crictl inspect *Image ID* | Check a container. | + +-----+---------------------------------------------------+---------------------------------------------------+-----------------------+ + +.. table:: **Table 2** Container-related commands + + +-----+------------------------------------------------------------------------+------------------------------------------------------------------------+--------------------------------------------+ + | No. | Docker Command | containerd Command | Remarks | + +=====+========================================================================+========================================================================+============================================+ + | 1 | docker ps [Option] | crictl ps [Option] | List containers. | + +-----+------------------------------------------------------------------------+------------------------------------------------------------------------+--------------------------------------------+ + | 2 | docker create [Option] | crictl create [Option] | Create a container. | + +-----+------------------------------------------------------------------------+------------------------------------------------------------------------+--------------------------------------------+ + | 3 | docker start [Option] *Container ID*... | crictl start [Option] *Container ID*... | Start a container. | + +-----+------------------------------------------------------------------------+------------------------------------------------------------------------+--------------------------------------------+ + | 4 | docker stop [Option] *Container ID*... | crictl stop [Option] *Container ID*... | Stop a container. | + +-----+------------------------------------------------------------------------+------------------------------------------------------------------------+--------------------------------------------+ + | 5 | docker rm [Option] *Container ID*... | crictl rm [Option] *Container ID*... | Delete a container. | + +-----+------------------------------------------------------------------------+------------------------------------------------------------------------+--------------------------------------------+ + | 6 | docker attach [Option] *Container ID* | crictl attach [Option] *Container ID* | Connect to a container. | + +-----+------------------------------------------------------------------------+------------------------------------------------------------------------+--------------------------------------------+ + | 7 | docker exec [Option] *Container ID* *Startup command* [*Parameter*...] | crictl exec [Option] *Container ID* *Startup command* [*Parameter*...] | Access the container. | + +-----+------------------------------------------------------------------------+------------------------------------------------------------------------+--------------------------------------------+ + | 8 | docker inspect [Option] *Container name*\ \|\ *ID*... | crictl inspect [Option] *Container ID*... | Query container details. | + +-----+------------------------------------------------------------------------+------------------------------------------------------------------------+--------------------------------------------+ + | 9 | docker logs [Option] *Container ID* | crictl logs [Option] *Container ID* | View container logs. | + +-----+------------------------------------------------------------------------+------------------------------------------------------------------------+--------------------------------------------+ + | 10 | docker stats [Option] *Container ID*... | crictl stats [Option] *Container ID* | Check the resource usage of the container. | + +-----+------------------------------------------------------------------------+------------------------------------------------------------------------+--------------------------------------------+ + | 11 | docker update [Option] *Container ID*... | crictl update [Option] *Container ID*... | Update container resource limits. | + +-----+------------------------------------------------------------------------+------------------------------------------------------------------------+--------------------------------------------+ + +.. table:: **Table 3** Pod-related commands + + +-----+----------------+--------------------------------------+-------------------+ + | No. | Docker Command | containerd Command | Remarks | + +=====+================+======================================+===================+ + | 1 | None | crictl pods [Option] | List pods. | + +-----+----------------+--------------------------------------+-------------------+ + | 2 | None | crictl inspectp [Option] *Pod ID*... | View pod details. | + +-----+----------------+--------------------------------------+-------------------+ + | 3 | None | crictl start [Option] *Pod ID*... | Start a pod. | + +-----+----------------+--------------------------------------+-------------------+ + | 4 | None | crictl runp [Option] *Pod ID*... | Run a pod. | + +-----+----------------+--------------------------------------+-------------------+ + | 5 | None | crictl stopp [Option] *Pod ID*... | Stop a pod. | + +-----+----------------+--------------------------------------+-------------------+ + | 6 | None | crictl rmp [Option] *Pod ID*... | Delete a pod. | + +-----+----------------+--------------------------------------+-------------------+ + +.. note:: + + Containers created and started by containerd are immediately deleted by kubelet. containerd does not support suspending, resuming, restarting, renaming, and waiting for containers, nor Docker image build, import, export, comparison, push, search, and labeling. containerd does not support file copy. You can log in to the image repository by modifying the configuration file of containerd. + +Differences in Tracing +---------------------- + +- Docker: + + kubelet --> docker shim (in the kubelet process) --> dockerd --> containerd + +- containerd: + + kubelet --> cri plugin (in the containerd process) --> containerd + +Although Docker has added functions such as swarm cluster, docker build, and Docker APIs, it also introduces bugs. Compared with containerd, Docker has one more layer of calling. diff --git a/umn/source/best_practice/container/index.rst b/umn/source/best_practice/container/index.rst new file mode 100644 index 0000000..e80fb24 --- /dev/null +++ b/umn/source/best_practice/container/index.rst @@ -0,0 +1,24 @@ +:original_name: cce_bestpractice_0051.html + +.. _cce_bestpractice_0051: + +Container +========= + +- :ref:`Properly Allocating Container Computing Resources ` +- :ref:`Modifying Kernel Parameters Using a Privileged Container ` +- :ref:`Initializing a Container ` +- :ref:`Using hostAliases to Configure /etc/hosts in a Pod ` +- :ref:`How Do I Select a Container Runtime? ` +- :ref:`Configuring Core Dumps ` + +.. toctree:: + :maxdepth: 1 + :hidden: + + properly_allocating_container_computing_resources + modifying_kernel_parameters_using_a_privileged_container + initializing_a_container + using_hostaliases_to_configure_etc_hosts_in_a_pod + how_do_i_select_a_container_runtime + configuring_core_dumps diff --git a/umn/source/best_practice/container/initializing_a_container.rst b/umn/source/best_practice/container/initializing_a_container.rst new file mode 100644 index 0000000..cd62c30 --- /dev/null +++ b/umn/source/best_practice/container/initializing_a_container.rst @@ -0,0 +1,90 @@ +:original_name: cce_bestpractice_00228.html + +.. _cce_bestpractice_00228: + +Initializing a Container +======================== + +Concepts +-------- + +Before containers running applications are started, one or some init containers are started first. If there are multiple init containers, they will be started in the defined sequence. The application containers are started only after all init containers run to completion and exit. Storage volumes in a pod are shared. Therefore, the data generated in the init containers can be used by the application containers. + +Init containers can be used in multiple Kubernetes resources, such as Deployments, DaemonSets, and jobs. They perform initialization before application containers are started. + +Scenario +-------- + +Before deploying a service, you can use an init container to make preparations before the pod where the service is running is deployed. After the preparations are complete, the init container runs to completion and exit, and the container to be deployed will be started. + +- **Scenario 1: Wait for other modules to be ready.** For example, an application contains two containerized services: web server and database. The web server service needs to access the database service. However, when the application is started, the database service may have not been started. Therefore, web server may fail to access database. To solve this problem, you can use an init container in the pod where web server is running to check whether database is ready. The init container runs to completion only when database is accessible. Then, web server is started and initiates a formal access request to database. +- **Scenario 2: Initialize the configuration.** For example, the init container can check all existing member nodes in the cluster and prepare the cluster configuration information for the application container. After the application container is started, it can be added to the cluster using the configuration information. +- **Other scenarios**: For example, register a pod with a central database and download application dependencies. + +For details, see `Init Containers `__. + +Procedure +--------- + +#. Edit the YAML file of the init container workload. + + **vi deployment.yaml** + + An example YAML file is provided as follows: + + .. code-block:: + + apiVersion: apps/v1 + kind: Deployment + metadata: + name: mysql + spec: + replicas: 1 + selector: + matchLabels: + name: mysql + template: + metadata: + labels: + name: mysql + spec: + initContainers: + - name: getresource + image: busybox + command: ['sleep 20'] + containers: + - name: mysql + image: percona:5.7.22 + imagePullPolicy: Always + ports: + - containerPort: 3306 + resources: + limits: + memory: "500Mi" + cpu: "500m" + requests: + memory: "500Mi" + cpu: "250m" + env: + - name: MYSQL_ROOT_PASSWORD + value: "mysql" + +#. Create an init container workload. + + **kubectl create -f deployment.yaml** + + Information similar to the following is displayed: + + .. code-block:: + + deployment.apps/mysql created + +#. Query the created Docker container on the node where the workload is running. + + **docker ps -a|grep mysql** + + The init container will exit after it runs to completion. The query result **Exited (0)** shows the exit status of the init container. + + |image1| + +.. |image1| image:: /_static/images/en-us_image_0261818867.png diff --git a/umn/source/best_practice/container/modifying_kernel_parameters_using_a_privileged_container.rst b/umn/source/best_practice/container/modifying_kernel_parameters_using_a_privileged_container.rst new file mode 100644 index 0000000..8876287 --- /dev/null +++ b/umn/source/best_practice/container/modifying_kernel_parameters_using_a_privileged_container.rst @@ -0,0 +1,116 @@ +:original_name: cce_bestpractice_00227.html + +.. _cce_bestpractice_00227: + +Modifying Kernel Parameters Using a Privileged Container +======================================================== + +Prerequisites +------------- + +To access a Kubernetes cluster from a client, you can use the Kubernetes command line tool kubectl. + +Procedure +--------- + +#. Create a DaemonSet in the background, select the Nginx image, enable the Privileged Container, configure the lifecycle, and add the **hostNetwork** field (value: **true**). + + a. Create a DaemonSet file. + + **vi daemonSet.yaml** + + An example YAML file is provided as follows: + + .. important:: + + The **spec.spec.containers.lifecycle** field indicates the command that will be run after the container is started. + + .. code-block:: + + kind: DaemonSet + apiVersion: apps/v1 + metadata: + name: daemonset-test + labels: + name: daemonset-test + spec: + selector: + matchLabels: + name: daemonset-test + template: + metadata: + labels: + name: daemonset-test + spec: + hostNetwork: true + containers: + - name: daemonset-test + image: nginx:alpine-perl + command: + - "/bin/sh" + args: + - "-c" + - while :; do time=$(date);done + imagePullPolicy: IfNotPresent + lifecycle: + postStart: + exec: + command: + - sysctl + - "-w" + - net.ipv4.tcp_tw_reuse=1 + securityContext: + privileged: true + imagePullSecrets: + - name: default-secret + + b. Create a DaemonSet. + + **kubectl create -f daemonSet.yaml** + +#. Check whether the DaemonSet is successfully created. + + **kubectl get daemonset** *DaemonSet name* + + In this example, run the following command: + + **kubectl get daemonset** daemonset-test + + Information similar to the following is displayed: + + .. code-block:: + + NAME DESIRED CURRENT READY UP-T0-DATE AVAILABLE NODE SELECTOR AGE + daemonset-test 2 2 2 2 2 2h + +#. Query the container ID of DaemonSet on the node. + + **docker ps -a|grep** *DaemonSet name* + + In this example, run the following command: + + **docker ps -a|grep** daemonset-test + + Information similar to the following is displayed: + + .. code-block:: + + 897b99faa9ce 3e094d5696c1 "/bin/sh -c while..." 31 minutes ago Up 30 minutes ault_fa7cc313-4ac1-11e9-a716-fa163e0aalba_0 + +#. Access the container. + + **docker exec -it** *containerid* **/bin/sh** + + In this example, run the following command: + + **docker exec -it** *897b99faa9ce* **/bin/sh** + +#. Check whether the configured command is executed after the container is started. + + **sysctl -a \|grep net.ipv4.tcp_tw_reuse** + + If the following information is displayed, the system parameters are modified successfully: + + .. code-block:: + + net.ipv4.tcp_tw_reuse=1 diff --git a/umn/source/best_practice/container/properly_allocating_container_computing_resources.rst b/umn/source/best_practice/container/properly_allocating_container_computing_resources.rst new file mode 100644 index 0000000..d1ffb3f --- /dev/null +++ b/umn/source/best_practice/container/properly_allocating_container_computing_resources.rst @@ -0,0 +1,125 @@ +:original_name: cce_bestpractice_00002.html + +.. _cce_bestpractice_00002: + +Properly Allocating Container Computing Resources +================================================= + +If a node has sufficient memory resources, a container on this node can use more memory resources than requested, but no more than limited. If the memory allocated to a container exceeds the upper limit, the container is stopped first. If the container continuously uses memory resources more than limited, the container is terminated. If a stopped container is allowed to be restarted, kubelet will restart it, but other types of run errors will occur. + +Scenario 1 +---------- + +The node's memory has reached the memory limit reserved for the node. As a result, OOM killer is triggered. + +**Solution** + +You can either scale up the node or migrate the pods on the node to other nodes. + +Scenario 2 +---------- + +The upper limit of resources configured for the pod is too small. When the actual usage exceeds the limit, OOM killer is triggered. + +**Solution** + +Set a higher upper limit for the workload. + +Example +------- + +A pod will be created and allocated memory that exceeds the limit. As shown in the following configuration file of the pod, the pod requests 50 MB memory and the memory limit is set to 100 MB. + +Example YAML file (memory-request-limit-2.yaml): + +.. code-block:: + + apiVersion: v1 + kind: Pod + metadata: + name: memory-demo-2 + spec: + containers: + - name: memory-demo-2-ctr + image: vish/stress + resources: + requests: + memory: 50Mi + limits: + memory: "100Mi" + args: + - -mem-total + - 250Mi + - -mem-alloc-size + - 10Mi + - -mem-alloc-sleep + - 1s + +The **args** parameters indicate that the container attempts to request 250 MB memory, which exceeds the pod's upper limit (100 MB). + +Creating a pod: + +.. code-block:: + + kubectl create -f https://k8s.io/docs/tasks/configure-pod-container/memory-request-limit-2.yaml --namespace=mem-example + +Viewing the details about the pod: + +.. code-block:: + + kubectl get pod memory-demo-2 --namespace=mem-example + +In this stage, the container may be running or be killed. If the container is not killed, repeat the previous command until the container is killed. + +.. code-block:: + + NAME READY STATUS RESTARTS AGE + memory-demo-2 0/1 OOMKilled 1 24s + +Viewing detailed information about the container: + +.. code-block:: + + kubectl get pod memory-demo-2 --output=yaml --namespace=mem-example + +This output indicates that the container is killed because the memory limit is exceeded. + +.. code-block:: + + lastState: + terminated: + containerID: docker://7aae52677a4542917c23b10fb56fcb2434c2e8427bc956065183c1879cc0dbd2 + exitCode: 137 + finishedAt: 2020-02-20T17:35:12Z + reason: OOMKilled + startedAt: null + +In this example, the container can be automatically restarted. Therefore, kubelet will start it again. You can run the following command several times to see how the container is killed and started: + +.. code-block:: + + kubectl get pod memory-demo-2 --namespace=mem-example + +The preceding command output indicates how the container is killed and started back and forth: + +.. code-block:: + + $ kubectl get pod memory-demo-2 --namespace=mem-example + NAME READY STATUS RESTARTS AGE + memory-demo-2 0/1 OOMKilled 1 37s + $ kubectl get pod memory-demo-2 --namespace=mem-example + NAME READY STATUS RESTARTS AGE + memory-demo-2 1/1 Running 2 40s + +Viewing the historical information of the pod: + +.. code-block:: + + kubectl describe pod memory-demo-2 --namespace=mem-example + +The following command output indicates that the pod is repeatedly killed and started. + +.. code-block:: + + ... Normal Created Created container with id 66a3a20aa7980e61be4922780bf9d24d1a1d8b7395c09861225b0eba1b1f8511 + ... Warning BackOff Back-off restarting failed container diff --git a/umn/source/best_practice/container/using_hostaliases_to_configure_etc_hosts_in_a_pod.rst b/umn/source/best_practice/container/using_hostaliases_to_configure_etc_hosts_in_a_pod.rst new file mode 100644 index 0000000..f7f1718 --- /dev/null +++ b/umn/source/best_practice/container/using_hostaliases_to_configure_etc_hosts_in_a_pod.rst @@ -0,0 +1,121 @@ +:original_name: cce_bestpractice_00226.html + +.. _cce_bestpractice_00226: + +Using hostAliases to Configure /etc/hosts in a Pod +================================================== + +Scenario +-------- + +If DNS or other related settings are inappropriate, you can use **hostAliases** to overwrite the resolution of the host name at the pod level when adding entries to the **/etc/hosts** file of the pod. + +Procedure +--------- + +#. Use kubectl to connect to the cluster. + +#. Create the **hostaliases-pod.yaml** file. + + **vi hostaliases-pod.yaml** + + The field in bold in the YAML file indicates the image name and tag. You can replace the example value as required. + + .. code-block:: + + apiVersion: v1 + kind: Pod + metadata: + name: hostaliases-pod + spec: + hostAliases: + - ip: 127.0.0.1 + hostnames: + - foo.local + - bar.local + - ip: 10.1.2.3 + hostnames: + - foo.remote + - bar.remote + containers: + - name: cat-hosts + image: tomcat:9-jre11-slim + lifecycle: + postStart: + exec: + command: + - cat + - /etc/hosts + imagePullSecrets: + - name: default-secret + + .. table:: **Table 1** pod field description + + +------------+--------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | Parameter | Mandatory/Optional | Description | + +============+====================+============================================================================================================================================================+ + | apiVersion | Mandatory | API version number | + +------------+--------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | kind | Mandatory | Type of the object to be created | + +------------+--------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | metadata | Mandatory | Metadata definition of a resource object | + +------------+--------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | name | Mandatory | Name of a pod | + +------------+--------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | spec | Mandatory | Detailed description of the pod. For details, see :ref:`Table 2 `. | + +------------+--------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------+ + + .. _cce_bestpractice_00226__en-us_topic_0226102200_en-us_topic_0179003345_table33531919193: + + .. table:: **Table 2** spec field description + + +-------------+--------------------+----------------------------------------------------------------------------------------------------------------------------+ + | Parameter | Mandatory/Optional | Description | + +=============+====================+============================================================================================================================+ + | hostAliases | Mandatory | Host alias | + +-------------+--------------------+----------------------------------------------------------------------------------------------------------------------------+ + | containers | Mandatory | For details, see :ref:`Table 3 `. | + +-------------+--------------------+----------------------------------------------------------------------------------------------------------------------------+ + + .. _cce_bestpractice_00226__en-us_topic_0226102200_en-us_topic_0179003345_table196127172016: + + .. table:: **Table 3** containers field description + + ========= ================== ==================== + Parameter Mandatory/Optional Description + ========= ================== ==================== + name Mandatory Container name + image Mandatory Container image name + lifecycle Optional Lifecycle + ========= ================== ==================== + +#. Create a pod. + + **kubectl create -f hostaliases-pod.yaml** + + If information similar to the following is displayed, the pod is created. + + .. code-block:: + + pod/hostaliases-pod created + +#. Query the pod status. + + **kubectl get pod hostaliases-pod** + + If the pod is in the **Running** state, the pod is successfully created. + + .. code-block:: + + NAME READY STATUS RESTARTS AGE + hostaliases-pod 1/1 Running 0 16m + +#. Check whether the **hostAliases** functions properly. + + **docker ps \|grep hostaliases-pod** + + **docker exec -ti Container ID /bin/sh** + + |image1| + +.. |image1| image:: /_static/images/en-us_image_0278498565.png diff --git a/umn/source/best_practice/devops/index.rst b/umn/source/best_practice/devops/index.rst new file mode 100644 index 0000000..f84d128 --- /dev/null +++ b/umn/source/best_practice/devops/index.rst @@ -0,0 +1,14 @@ +:original_name: cce_bestpractice_0322.html + +.. _cce_bestpractice_0322: + +DevOps +====== + +- :ref:`Interconnecting GitLab with SWR and CCE for CI/CD ` + +.. toctree:: + :maxdepth: 1 + :hidden: + + interconnecting_gitlab_with_swr_and_cce_for_ci_cd diff --git a/umn/source/best_practice/devops/interconnecting_gitlab_with_swr_and_cce_for_ci_cd.rst b/umn/source/best_practice/devops/interconnecting_gitlab_with_swr_and_cce_for_ci_cd.rst new file mode 100644 index 0000000..4a1c088 --- /dev/null +++ b/umn/source/best_practice/devops/interconnecting_gitlab_with_swr_and_cce_for_ci_cd.rst @@ -0,0 +1,191 @@ +:original_name: cce_bestpractice_0324.html + +.. _cce_bestpractice_0324: + +Interconnecting GitLab with SWR and CCE for CI/CD +================================================= + +Challenges +---------- + +GitLab is an open-source version management system developed with Ruby on Rails for Git project repository management. It supports web-based access to public and private projects. Similar to GitHub, GitLab allows you to browse source code, manage bugs and comments, and control team member access to repositories. You will find it very easy to view committed versions and file history database. Team members can communicate with each other using the built-in chat program (Wall). + +GitLab provides powerful CI/CD functions and is widely used in software development. + + +.. figure:: /_static/images/en-us_image_0000001291567729.png + :alt: **Figure 1** GitLab CI/CD process + + **Figure 1** GitLab CI/CD process + +This section describes how to interconnect GitLab with SWR and CCE for CI/CD. + +Preparations +------------ + +#. Create a CCE cluster and a node and bind an EIP to the node for downloading an image during GitLab Runner installation. +#. Download and configure kubectl to connect to the cluster. +#. `Install Helm 3 `__. + +Installing GitLab Runner +------------------------ + +Log in to `GitLab `__, choose **Settings** > **CI/CD** in the project view, click **Expand** next to **Runners**, and search for the GitLab Runner registration URL and token. + +|image1| + +Create the **values.yaml** file and fill in the following information: + +.. code-block:: + + # Registration URL + gitlabUrl: https://gitlab.com/ + # Registration token + runnerRegistrationToken: "GR13489411dKVzmTyaywEDTF_1QXb" + rbac: + create: true + runners: + privileged: true + +Create a GitLab namespace. + +.. code-block:: + + kubectl create namespace gitlab + +Install GitLab Runner using Helm. + +.. code-block:: + + helm repo add gitlab https://charts.gitlab.io + helm install --namespace gitlab gitlab-runner -f values.yaml gitlab/gitlab-runner + +After the installation, you can query the workload of gitlab-runner on the CCE console and view the connection information in GitLab later. + +|image2| + +Creating an Application +----------------------- + +Place the application to be created in the GitLab project repository. This section takes Nginx modification as an example. For details, visit https://gitlab.com/c8147/cidemo/-/tree/main. + +The following files are included: + +- **.gitlab-ci.yml**: Gitlab CI file, which will be described in detail in :ref:`Creating a Pipeline `. +- **Dockerfile**: used to build Docker images. +- **index.html**: used to replace the index page of Nginx. +- **k8s.yaml**: used to deploy the Nginx app. A Deployment named **nginx-test** and a Service named **nginx-test** will be created. + +The preceding files are only examples. You can replace or modify them accordingly. + +Configuring Global Variables +---------------------------- + +When using pipelines, you need to build an image, upload it to SWR, and run kubectl commands to deploy the image in the cluster. Before performing these operations, you must log in to SWR and obtain the credential for connecting to the cluster. You can define the information as variables in GitLab. + +Log in to `GitLab `__, choose **Settings** > **CI/CD** in the project view, and click **Expand** next to **Variables** to add variables. + +|image3| + +- **kube_config** + + **kubeconfig.json** file used for kubectl command authentication. Run the following command on the host where kubectl is configured to convert the file to the Base64 format: + + **echo $(cat ~/.kube/config \| base64) \| tr -d " "** + + The command output is the content of **kubeconfig.json**. + +- **project**: project name. + + Log in to the management console, click your username in the upper right corner, and click **My Credentials**. In the **Projects** area on the **API Credentials** page, check the name of the project in your current region. + +- **swr_ak**: access key. + + Log in to the management console, click your username in the upper right corner, and click **My Credentials**. In the navigation pane on the left, choose **Access Keys**. Click **Create Access Key**, enter the description, and click **OK**. In the displayed **Information** dialog box, click **Download**. After the certificate is downloaded, obtain the AK and SK information from the **credentials** file. + +- **swr_sk**: secret key for logging in to SWR. + + Run the following command to obtain the key pair. Replace *$AK* and *$SK* with the AK and SK obtained in the preceding steps. + + **printf "$AK" \| openssl dgst -binary -sha256 -hmac "$SK" \| od -An -vtx1 \| sed 's/[ \\n]//g' \| sed 'N;s/\\n//'** + + The command output displays the login key pair. + +.. _cce_bestpractice_0324__section171541431101910: + +Creating a Pipeline +------------------- + +Log in to `Gitlab `__ and add the **.gitlab-ci.yml** file to **Repository**. + +|image4| + +The content is as follows: + +.. code-block:: + + # Define pipeline stages, including package, build, and deploy. + stages: + - package + - build + - deploy + # If no image is specified in each stage, the default image docker:latest is used. + image: docker:latest + # In the package stage, only printing is performed. + package: + stage: package + script: + - echo "package" + # In the build stage, the Docker-in-Docker mode is used. + build: + stage: build + # Define environment variables for the build stage. + variables: + DOCKER_HOST: tcp://docker:2375 + # Define the image for running Docker-in-Docker. + services: + - docker:18.09-dind + script: + - echo "build" + # Log in to SWR. + - docker login -u $project@$swr_ak -p $swr_sk swr.eu-de.otc.t-systems.com + # Build an image. k8s-dev is the organization name in SWR. Replace it to the actual name. + - docker build -t swr.eu-de.otc.t-systems.com/k8s-dev/nginx:$CI_PIPELINE_ID . + # Push the image to SWR. + - docker push swr.eu-de.otc.t-systems.com/k8s-dev/nginx:$CI_PIPELINE_ID + deploy: + # Use the kubectl image. + image: + name: bitnami/kubectl:latest + entrypoint: [""] + stage: deploy + script: + # Configure the kubeconfig file. + - echo $kube_config |base64 -d > $KUBECONFIG + # Replace the image in the k8s.yaml file. + - sed -i "s//swr.eu-de.otc.t-systems.com\/k8s-dev\/nginx:$CI_PIPELINE_ID/g" k8s.yaml + - cat k8s.yaml + # Deploy an application. + - kubectl apply -f k8s.yaml + +After the **.gitlab-ci.yml** file is saved, the pipeline is started immediately. You can view the pipeline execution status in GitLab. + +|image5| + +Verifying Deployment +-------------------- + +After the pipeline is deployed, locate the **nginx-test** Service on the CCE console, query its access address, and run the **curl** command to access the Service. + +.. code-block:: + + # curl xxx.xxx.xxx.xxx:31111 + Hello Gitlab! + +If the preceding information is displayed, the deployment is correct. + +.. |image1| image:: /_static/images/en-us_image_0000001238489436.png +.. |image2| image:: /_static/images/en-us_image_0000001283301301.png +.. |image3| image:: /_static/images/en-us_image_0000001238903330.png +.. |image4| image:: /_static/images/en-us_image_0000001238830246.png +.. |image5| image:: /_static/images/en-us_image_0000001283343269.png diff --git a/umn/source/best_practice/disaster_recovery/implementing_high_availability_for_containers_in_cce.rst b/umn/source/best_practice/disaster_recovery/implementing_high_availability_for_containers_in_cce.rst new file mode 100644 index 0000000..8b7cd14 --- /dev/null +++ b/umn/source/best_practice/disaster_recovery/implementing_high_availability_for_containers_in_cce.rst @@ -0,0 +1,134 @@ +:original_name: cce_bestpractice_00220.html + +.. _cce_bestpractice_00220: + +Implementing High Availability for Containers in CCE +==================================================== + +Basic Principles +---------------- + +To achieve high availability for your CCE containers, you can do as follows: + +#. Deploy three master nodes for the cluster. +#. When nodes are deployed across AZs, set custom scheduling policies based on site requirements to maximize resource utilization. +#. Create multiple node pools in different AZs and use them for node scaling. +#. Set the number of pods to be greater than 2 when creating a workload. +#. Set pod affinity rules to distribute pods to different AZs and nodes. + +Procedure +--------- + +Assume that there are four nodes in a cluster distributed in the following AZs: + +.. code-block:: + + $ kubectl get node -L topology.kubernetes.io/zone,kubernetes.io/hostname + NAME STATUS ROLES AGE VERSION ZONE HOSTNAME + 192.168.5.112 Ready 42m v1.21.7-r0-CCE21.11.1.B007 eu-de-01 192.168.5.112 + 192.168.5.179 Ready 42m v1.21.7-r0-CCE21.11.1.B007 eu-de-01 192.168.5.179 + 192.168.5.252 Ready 37m v1.21.7-r0-CCE21.11.1.B007 eu-de-02 192.168.5.252 + 192.168.5.8 Ready 33h v1.21.7-r0-CCE21.11.1.B007 eu-de-03 192.168.5.8 + +Create workloads according to the following two podAntiAffinity rules: + +- The first one is the pod anti-affinity in an AZ. Set the parameters as follows: + + - **weight**: A larger weight value indicates a higher priority. In this example, set it to **50**. + - **topologyKey**: a default or custom key for the node label that the system uses to denote a topology domain. A topology key determines the scope where the pod should be scheduled to. In this example, set this parameter to **topology.kubernetes.io/zone**, which is the label for identifying the AZ where the node is located. + - **labelSelector**: Select the label of the workload to realize the anti-affinity between this container and the workload. + +- The second one is the pod anti-affinity in the node host name. Set the parameters as follows: + + - **weight**: Set it to **50**. + - **topologyKey**: Set it to **kubernetes.io/hostname**. + - **labelSelector**: Select the label of the pod, which is anti-affinity with the pod. + +.. code-block:: + + kind: Deployment + apiVersion: apps/v1 + metadata: + name: nginx + namespace: default + spec: + replicas: 2 + selector: + matchLabels: + app: nginx + template: + metadata: + labels: + app: nginx + spec: + containers: + - name: container-0 + image: nginx:alpine + resources: + limits: + cpu: 250m + memory: 512Mi + requests: + cpu: 250m + memory: 512Mi + affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 50 + podAffinityTerm: + labelSelector: # Select the label of the workload to realize the anti-affinity between this container and the workload. + matchExpressions: + - key: app + operator: In + values: + - nginx + namespaces: + - default + topologyKey: topology.kubernetes.io/zone # It takes effect in the same AZ. + - weight: 50 + podAffinityTerm: + labelSelector: # Select the label of the workload to realize the anti-affinity between this container and the workload. + matchExpressions: + - key: app + operator: In + values: + - nginx + namespaces: + - default + topologyKey: kubernetes.io/hostname # It takes effect on the node. + imagePullSecrets: + - name: default-secret + +Create a workload and view the node where the pod is located. + +.. code-block:: + + $ kubectl get pod -owide + NAME READY STATUS RESTARTS AGE IP NODE + nginx-6fffd8d664-dpwbk 1/1 Running 0 17s 10.0.0.132 192.168.5.112 + nginx-6fffd8d664-qhclc 1/1 Running 0 17s 10.0.1.133 192.168.5.252 + +Increase the number of pods to 3. The pod is scheduled to another node, and the three nodes are in three different AZs. + +.. code-block:: + + $ kubectl scale --replicas=3 deploy/nginx + deployment.apps/nginx scaled + $ kubectl get pod -owide + NAME READY STATUS RESTARTS AGE IP NODE + nginx-6fffd8d664-8t7rv 1/1 Running 0 3s 10.0.0.9 192.168.5.8 + nginx-6fffd8d664-dpwbk 1/1 Running 0 2m45s 10.0.0.132 192.168.5.112 + nginx-6fffd8d664-qhclc 1/1 Running 0 2m45s 10.0.1.133 192.168.5.252 + +Increase the number of pods to 4. The pod is scheduled to the last node. With podAntiAffinity rules, pods can be evenly distributed to AZs and nodes. + +.. code-block:: + + $ kubectl scale --replicas=4 deploy/nginx + deployment.apps/nginx scaled + $ kubectl get pod -owide + NAME READY STATUS RESTARTS AGE IP NODE + nginx-6fffd8d664-8t7rv 1/1 Running 0 2m30s 10.0.0.9 192.168.5.8 + nginx-6fffd8d664-dpwbk 1/1 Running 0 5m12s 10.0.0.132 192.168.5.112 + nginx-6fffd8d664-h796b 1/1 Running 0 78s 10.0.1.5 192.168.5.179 + nginx-6fffd8d664-qhclc 1/1 Running 0 5m12s 10.0.1.133 192.168.5.252 diff --git a/umn/source/best_practice/disaster_recovery/index.rst b/umn/source/best_practice/disaster_recovery/index.rst new file mode 100644 index 0000000..536b351 --- /dev/null +++ b/umn/source/best_practice/disaster_recovery/index.rst @@ -0,0 +1,14 @@ +:original_name: cce_bestpractice_0323.html + +.. _cce_bestpractice_0323: + +Disaster Recovery +================= + +- :ref:`Implementing High Availability for Containers in CCE ` + +.. toctree:: + :maxdepth: 1 + :hidden: + + implementing_high_availability_for_containers_in_cce diff --git a/umn/source/best_practice/index.rst b/umn/source/best_practice/index.rst new file mode 100644 index 0000000..5b5e2de --- /dev/null +++ b/umn/source/best_practice/index.rst @@ -0,0 +1,32 @@ +:original_name: cce_bestpractice.html + +.. _cce_bestpractice: + +Best Practice +============= + +- :ref:`Checklist for Deploying Containerized Applications in the Cloud ` +- :ref:`Migration ` +- :ref:`DevOps ` +- :ref:`Disaster Recovery ` +- :ref:`Security ` +- :ref:`Auto Scaling ` +- :ref:`Cluster ` +- :ref:`Networking ` +- :ref:`Storage ` +- :ref:`Container ` + +.. toctree:: + :maxdepth: 1 + :hidden: + + checklist_for_deploying_containerized_applications_in_the_cloud + migration/index + devops/index + disaster_recovery/index + security/index + auto_scaling/index + cluster/index + networking/index + storage/index + container/index diff --git a/umn/source/best_practice/migration/index.rst b/umn/source/best_practice/migration/index.rst new file mode 100644 index 0000000..011cb32 --- /dev/null +++ b/umn/source/best_practice/migration/index.rst @@ -0,0 +1,14 @@ +:original_name: cce_bestpractice_00237.html + +.. _cce_bestpractice_00237: + +Migration +========= + +- :ref:`Migrating On-premises Kubernetes Clusters to CCE ` + +.. toctree:: + :maxdepth: 1 + :hidden: + + migrating_on-premises_kubernetes_clusters_to_cce/index diff --git a/umn/source/best_practice/migration/migrating_on-premises_kubernetes_clusters_to_cce/index.rst b/umn/source/best_practice/migration/migrating_on-premises_kubernetes_clusters_to_cce/index.rst new file mode 100644 index 0000000..67c6583 --- /dev/null +++ b/umn/source/best_practice/migration/migrating_on-premises_kubernetes_clusters_to_cce/index.rst @@ -0,0 +1,28 @@ +:original_name: cce_bestpractice_0306.html + +.. _cce_bestpractice_0306: + +Migrating On-premises Kubernetes Clusters to CCE +================================================ + +- :ref:`Solution Overview ` +- :ref:`Planning Resources for the Target Cluster ` +- :ref:`Migrating Resources Outside a Cluster ` +- :ref:`Installing the Migration Tool ` +- :ref:`Migrating Resources in a Cluster ` +- :ref:`Updating Resources Accordingly ` +- :ref:`Performing Additional Tasks ` +- :ref:`Troubleshooting ` + +.. toctree:: + :maxdepth: 1 + :hidden: + + solution_overview + planning_resources_for_the_target_cluster + migrating_resources_outside_a_cluster + installing_the_migration_tool + migrating_resources_in_a_cluster + updating_resources_accordingly + performing_additional_tasks + troubleshooting diff --git a/umn/source/best_practice/migration/migrating_on-premises_kubernetes_clusters_to_cce/installing_the_migration_tool.rst b/umn/source/best_practice/migration/migrating_on-premises_kubernetes_clusters_to_cce/installing_the_migration_tool.rst new file mode 100644 index 0000000..ea94f39 --- /dev/null +++ b/umn/source/best_practice/migration/migrating_on-premises_kubernetes_clusters_to_cce/installing_the_migration_tool.rst @@ -0,0 +1,198 @@ +:original_name: cce_bestpractice_0310.html + +.. _cce_bestpractice_0310: + +Installing the Migration Tool +============================= + +Velero is an open-source backup and migration tool for Kubernetes clusters. It integrates the persistent volume (PV) data backup capability of the Restic tool and can be used to back up Kubernetes resource objects (such as Deployments, jobs, Services, and ConfigMaps) in the source cluster. Data in the PV mounted to the pod is backed up and uploaded to the object storage. When a disaster occurs or migration is required, the target cluster can use Velero to obtain the corresponding backup data from OBS and restore cluster resources as required. + +According to :ref:`Migration Solution `, you need to prepare temporary object storage to store backup files before the migration. Velero supports OSB or `MinIO `__ as the object storage. OBS requires sufficient storage space for storing backup files. You can estimate the storage space based on your cluster scale and data volume. You are advised to use OBS for backup. For details about how to deploy Velero, see :ref:`Installing Velero `. + +Prerequisites +------------- + +- The Kubernetes version of the source on-premises cluster must be 1.10 or later, and the cluster can use DNS and Internet services properly. +- If you use OBS to store backup files, you need to obtain the AK/SK of a user who has the right to operate OBS. For details about how to obtain the AK/SK, see `Access Keys `__. +- If you use MinIO to store backup files, bind an EIP to the server where MinIO is installed and enable the API and console port of MinIO in the security group. +- The target CCE cluster has been created. +- The source cluster and target cluster must each have at least one idle node. It is recommended that the node specifications be 4 vCPUs and 8 GB memory or higher. + +Installing MinIO +---------------- + +MinIO is an open-source, high-performance object storage tool compatible with the S3 API protocol. If MinIO is used to store backup files for cluster migration, you need a temporary server to deploy MinIO and provide services for external systems. If you use OBS to store backup files, skip this section and go to :ref:`Installing Velero `. + +MinIO can be installed in any of the following locations: + +- Temporary ECS outside the cluster + + If the MinIO server is installed outside the cluster, backup files will not be affected when a catastrophic fault occurs in the cluster. + +- Idle nodes in the cluster + + You can remotely log in to a node to install the MinIO server or install MinIO in a container. For details, see the official Velero documentation at https://velero.io/docs/v1.7/contributions/minio/#set-up-server. + + .. important:: + + For example, to install MinIO in a container, run the following command: + + - The storage type in the YAML file provided by Velero is **emptyDir**. You are advised to change the storage type to **HostPath** or **Local**. Otherwise, backup files will be permanently lost after the container is restarted. + - Ensure that the MinIO service is accessible externally. Otherwise, backup files cannot be downloaded outside the cluster. You can change the Service type to NodePort or use other types of public network access Services. + +Regardless of which deployment method is used, the server where MinIO is installed must have sufficient storage space, an EIP must be bound to the server, and the MinIO service port must be enabled in the security group. Otherwise, backup files cannot be uploaded or downloaded. + +In this example, MinIO is installed on a temporary ECS outside the cluster. + +#. Download MinIO. + + .. code-block:: + + mkdir /opt/minio + mkdir /opt/miniodata + cd /opt/minio + wget https://dl.minio.io/server/minio/release/linux-amd64/minio + chmod +x minio + +#. .. _cce_bestpractice_0310__li126129251432: + + Set the username and password of MinIO. + + The username and password set using this method are temporary environment variables and must be reset after the service is restarted. Otherwise, the default root credential **minioadmin:minioadmin** will be used to create the service. + + .. code-block:: + + export MINIO_ROOT_USER=minio + export MINIO_ROOT_PASSWORD=minio123 + +#. Create a service. In the command, **/opt/miniodata/** indicates the local disk path for MinIO to store data. + + The default API port of MinIO is 9000, and the console port is randomly generated. You can use the **--console-address** parameter to specify a console port. + + .. code-block:: + + ./minio server /opt/miniodata/ --console-address ":30840" & + + .. note:: + + Enable the API and console ports in the firewall and security group on the server where MinIO is to be installed. Otherwise, access to the object bucket will fail. + +#. Use a browser to access http://{*EIP of the node where MinIO resides*}:30840. The MinIO console page is displayed. + +.. _cce_bestpractice_0310__section138392220432: + +Installing Velero +----------------- + +Go to the OBS console or MinIO console and create a bucket named **velero** to store backup files. You can custom the bucket name, which must be used when installing Velero. Otherwise, the bucket cannot be accessed and the backup fails. For details, see :ref:`4 `. + +.. important:: + + - Velero instances need to be installed and deployed in both the **source and target clusters**. The installation procedures are the same, which are used for backup and restoration, respectively. + - The master node of a CCE cluster does not provide a port for remote login. You can install Velero using kubectl. + - If there are a large number of resources to back up, you are advised to adjust the CPU and memory resources of Velero and Restic to 1 vCPU and 1 GB memory or higher. For details, see :ref:`Backup Tool Resources Are Insufficient `. + - The object storage bucket for storing backup files must be **empty**. + +Download the latest, stable binary file from https://github.com/vmware-tanzu/velero/releases. This section uses Velero 1.7.0 as an example. The installation process in the source cluster is the same as that in the target cluster. + +#. Download the binary file of Velero 1.7.0. + + .. code-block:: + + wget https://github.com/vmware-tanzu/velero/releases/download/v1.7.0/velero-v1.7.0-linux-amd64.tar.gz + +#. Install the Velero client. + + .. code-block:: + + tar -xvf velero-v1.7.0-linux-amd64.tar.gz + cp ./velero-v1.7.0-linux-amd64/velero /usr/local/bin + +#. .. _cce_bestpractice_0310__li197871715322: + + Create the access key file **credentials-velero** for the backup object storage. + + .. code-block:: + + vim credentials-velero + + Replace the AK/SK in the file based on the site requirements. If OBS is used, obtain the AK/SK by referring to . If MinIO is used, the AK and SK are the username and password created in :ref:`2 `. + + .. code-block:: + + [default] + aws_access_key_id = {AK} + aws_secret_access_key = {SK} + +#. .. _cce_bestpractice_0310__li1722825643415: + + Deploy the Velero server. Change the value of **--bucket** to the name of the created object storage bucket. In this example, the bucket name is **velero**. For more information about custom installation parameters, see `Customize Velero Install `__. + + .. code-block:: + + velero install \ + --provider aws \ + --plugins velero/velero-plugin-for-aws:v1.2.1 \ + --bucket velero \ + --secret-file ./credentials-velero \ + --use-restic \ + --use-volume-snapshots=false \ + --backup-location-config region=eu-de,s3ForcePathStyle="true",s3Url=http://obs.eu-de.otc.t-systems.com + + .. table:: **Table 1** Installation parameters of Velero + + +-----------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | Parameter | Description | + +===================================+====================================================================================================================================================================================================================================================================+ + | --provider | Vendor who provides the plug-in. | + +-----------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | --plugins | API component compatible with AWS S3. Both OBS and MinIO support the S3 protocol. | + +-----------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | --bucket | Name of the object storage bucket for storing backup files. The bucket must be created in advance. | + +-----------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | --secret-file | Secret file for accessing the object storage, that is, the **credentials-velero** file created in :ref:`3 `. | + +-----------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | --use-restic | Whether to use Restic to support PV data backup. You are advised to enable this function. Otherwise, storage volume resources cannot be backed up. | + +-----------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | --use-volume-snapshots | Whether to create the VolumeSnapshotLocation object for PV snapshot, which requires support from the snapshot program. Set this parameter to **false**. | + +-----------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | --backup-location-config | OBS bucket configurations, including region, s3ForcePathStyle, and s3Url. | + +-----------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | region | Region to which object storage bucket belongs. | + | | | + | | - If OBS is used, set this parameter according to your region, for example, **eu-de**. | + | | - If MinIO is used, set this parameter to **minio**. | + +-----------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | s3ForcePathStyle | The value **true** indicates that the S3 file path format is used. | + +-----------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | s3Url | API access address of the object storage bucket. | + | | | + | | - If OBS is used, set this parameter to **http://obs.{region}.otc.t-systems.com** (*region* indicates the region where the object storage bucket is located). For example, if the region is eu-de, the parameter value is **http://obs.eu-de.otc.t-systems.com**. | + | | - If MinIO is used, set this parameter to **http://{EIP of the node where minio is located}:9000**. The value of this parameter is determined based on the IP address and port of the node where MinIO is installed. | + | | | + | | .. note:: | + | | | + | | - The access port in s3Url must be set to the API port of MinIO instead of the console port. The default API port of MinIO is 9000. | + | | - To access MinIO installed outside the cluster, you need to enter the public IP address of MinIO. | + +-----------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + +#. By default, a namespace named **velero** is created for the Velero instance. Run the following command to view the pod status: + + .. code-block:: + + $ kubectl get pod -n velero + NAME READY STATUS RESTARTS AGE + restic-rn29c 1/1 Running 0 16s + velero-c9ddd56-tkzpk 1/1 Running 0 16s + + .. note:: + + To prevent memory insufficiency during backup in the actual production environment, you are advised to change the CPU and memory allocated to Restic and Velero by referring to :ref:`Backup Tool Resources Are Insufficient `. + +#. Check the interconnection between Velero and the object storage and ensure that the status is **Available**. + + .. code-block:: + + $ velero backup-location get + NAME PROVIDER BUCKET/PREFIX PHASE LAST VALIDATED ACCESS MODE DEFAULT + default aws velero Available 2021-10-22 15:21:12 +0800 CST ReadWrite true diff --git a/umn/source/best_practice/migration/migrating_on-premises_kubernetes_clusters_to_cce/migrating_resources_in_a_cluster.rst b/umn/source/best_practice/migration/migrating_on-premises_kubernetes_clusters_to_cce/migrating_resources_in_a_cluster.rst new file mode 100644 index 0000000..df1d142 --- /dev/null +++ b/umn/source/best_practice/migration/migrating_on-premises_kubernetes_clusters_to_cce/migrating_resources_in_a_cluster.rst @@ -0,0 +1,159 @@ +:original_name: cce_bestpractice_0311.html + +.. _cce_bestpractice_0311: + +Migrating Resources in a Cluster +================================ + +Scenario +-------- + +WordPress is used as an example to describe how to migrate an application from an on-premises Kubernetes cluster to a CCE cluster. The WordPress application consists of the WordPress and MySQL components, which are containerized. The two components are bound to two local storage volumes of the Local type respectively and provide external access through the NodePort Service. + +Before the migration, use a browser to access the WordPress site, create a site named **Migrate to CCE**, and publish an article to verify the integrity of PV data after the migration. The article published in WordPress will be stored in the **wp_posts** table of the MySQL database. If the migration is successful, all contents in the database will be migrated to the new cluster. You can verify the PV data migration based on the migration result. + +Prerequisites +------------- + +- Before the migration, clear the abnormal pod resources in the source cluster. If the pod is in the abnormal state and has a PVC mounted, the PVC is in the pending state after the cluster is migrated. +- Ensure that the cluster on the CCE side does not have the same resources as the cluster to be migrated because Velero does not restore the same resources by default. +- To ensure that container image images can be properly pulled after cluster migration, migrate the images to SWR. +- CCE does not support EVS disks of the **ReadWriteMany** type. If resources of this type exist in the source cluster, change the storage type to **ReadWriteOnce**. +- Velero integrates the Restic tool to back up and restore storage volumes. Currently, the storage volumes of the HostPath type are not supported. For details, see `Restic Restrictions `__. If you need to back up storage volumes of this type, replace the hostPath volumes with local volumes by referring to :ref:`Storage Volumes of the HostPath Type Cannot Be Backed Up `. If a backup task involves storage of the HostPath type, the storage volumes of this type will be automatically skipped and a warning message will be generated. This will not cause a backup failure. + +.. _cce_bestpractice_0311__section750718193288: + +Backing Up Applications in the Source Cluster +--------------------------------------------- + +#. .. _cce_bestpractice_0311__li686918502812: + + (Optional) If you need to back up the data of a specified storage volume in the pod, add an annotation to the pod. The annotation template is as follows: + + .. code-block:: + + kubectl -n annotate backup.velero.io/backup-volumes=,,... + + - ****: namespace where the pod is located. + - ****: pod name. + - ****: name of the persistent volume mounted to the pod. You can run the **describe** statement to query the pod information. The **Volume** field indicates the names of all persistent volumes attached to the pod. + + Add annotations to the pods of WordPress and MySQL. The pod names are **wordpress-758fbf6fc7-s7fsr** and **mysql-5ffdfbc498-c45lh**. As the pods are in the default namespace **default**, the **-n ** parameter can be omitted. + + .. code-block:: + + kubectl annotate pod/wordpress-758fbf6fc7-s7fsr backup.velero.io/backup-volumes=wp-storage + kubectl annotate pod/mysql-5ffdfbc498-c45lh backup.velero.io/backup-volumes=mysql-storage + +#. Back up the application. During the backup, you can specify resources based on parameters. If no parameter is added, the entire cluster resources are backed up by default. For details about the parameters, see `Resource filtering `__. + + - **--default-volumes-to-restic**: indicates that the Restic tool is used to back up all storage volumes mounted to the pod. Storage volumes of the HostPath type are not supported. If this parameter is not specified, the storage volume specified by annotation in :ref:`1 ` is backed up by default. This parameter is available only when **--use-restic** is specified during :ref:`Velero installation `. + + .. code-block:: + + velero backup create --default-volumes-to-restic + + - **--include-namespaces**: backs up resources in a specified namespace. + + .. code-block:: + + velero backup create --include-namespaces + + - **--include-resources**: backs up the specified resources. + + .. code-block:: + + velero backup create --include-resources deployments + + - **--selector**: backs up resources that match the selector. + + .. code-block:: + + velero backup create --selector = + + In this section, resources in the namespace **default** are backed up. **wordpress-backup** is the backup name. You need to specify the same backup name when restoring applications. Example: + + .. code-block:: + + velero backup create wordpress-backup --include-namespaces default --default-volumes-to-restic + + If the following information is displayed, the backup task is successfully created: + + .. code-block:: + + Backup request "wordpress-backup" submitted successfully. Run `velero backup describe wordpress-backup` or `velero backup logs wordpress-backup` for more details. + +#. Check the backup status. + + .. code-block:: + + velero backup get + + Information similar to the following is displayed: + + .. code-block:: + + NAME STATUS ERRORS WARNINGS CREATED EXPIRES STORAGE LOCATION SELECTOR + wordpress-backup Completed 0 0 2021-10-14 15:32:07 +0800 CST 29d default + + In addition, you can go to the object bucket to view the backup files. The backups path is the application resource backup path, and the restic path is the PV data backup path. + + |image1| + +.. _cce_bestpractice_0311__section482103142819: + +Restoring Applications in the Target Cluster +-------------------------------------------- + +The storage infrastructure of an on-premises cluster is different from that of a cloud cluster. After the cluster is migrated, PVs cannot be mounted to pods. Therefore, during the migration, you need to update the storage class of the target cluster to shield the differences of underlying storage interfaces between the two clusters when creating a workload and request storage resources of the corresponding type. For details, see :ref:`Updating the Storage Class `. + +#. Use kubectl to connect to the CCE cluster. Create a storage class with the same name as that of the source cluster. + + In this example, the storage class name of the source cluster is **local** and the storage type is local disk. Local disks completely depend on the node availability. The data DR performance is poor. When the node is unavailable, the existing storage data is affected. Therefore, EVS volumes are used as storage resources in CCE clusters, and SAS disks are used as backend storage media. + + .. note:: + + - When an application containing PV data is restored in a CCE cluster, the defined storage class dynamically creates and mounts storage resources (such as EVS volumes) based on the PVC. + - The storage resources of the cluster can be changed as required, not limited to EVS volumes. To mount other types of storage, such as file storage and object storage, see :ref:`Updating the Storage Class `. + + YAML file of the migrated cluster: + + .. code-block:: + + apiVersion: storage.k8s.io/v1 + kind: StorageClass + metadata: + name: local + provisioner: kubernetes.io/no-provisioner + volumeBindingMode: WaitForFirstConsumer + + The following is an example of the YAML file of the migration cluster: + + .. code-block:: + + allowVolumeExpansion: true + apiVersion: storage.k8s.io/v1 + kind: StorageClass + metadata: + name: local + selfLink: /apis/storage.k8s.io/v1/storageclasses/csi-disk + parameters: + csi.storage.k8s.io/csi-driver-name: disk.csi.everest.io + csi.storage.k8s.io/fstype: ext4 + everest.io/disk-volume-type: SAS + everest.io/passthrough: "true" + provisioner: everest-csi-provisioner + reclaimPolicy: Delete + volumeBindingMode: Immediate + +#. Use the Velero tool to create a restore and specify a backup named **wordpress-backup** to restore the WordPress application to the CCE cluster. + + .. code-block:: + + velero restore create --from-backup wordpress-backup + + You can run the **velero restore get** statement to view the application restoration status. + +#. After the restoration is complete, check whether the application is running properly. If other adaptation problems may occur, rectify the fault by following the procedure described in :ref:`Updating Resources Accordingly `. + +.. |image1| image:: /_static/images/en-us_image_0000001171703840.png diff --git a/umn/source/best_practice/migration/migrating_on-premises_kubernetes_clusters_to_cce/migrating_resources_outside_a_cluster.rst b/umn/source/best_practice/migration/migrating_on-premises_kubernetes_clusters_to_cce/migrating_resources_outside_a_cluster.rst new file mode 100644 index 0000000..e4403c0 --- /dev/null +++ b/umn/source/best_practice/migration/migrating_on-premises_kubernetes_clusters_to_cce/migrating_resources_outside_a_cluster.rst @@ -0,0 +1,57 @@ +:original_name: cce_bestpractice_0309.html + +.. _cce_bestpractice_0309: + +Migrating Resources Outside a Cluster +===================================== + +If your migration does not involve resources outside a cluster listed in :ref:`Table 1 ` or you do not need to use other services to update resources after the migration, skip this section. + +Migrating Container Images +-------------------------- + +To ensure that container images can be properly pulled after cluster migration and improve container deployment efficiency, you are advised to migrate private images to SoftWare Repository for Container (SWR). CCE works with SWR to provide a pipeline for automated container delivery. Images are pulled in parallel, which greatly improves container delivery efficiency. + +You need to manually migrate container images. + +#. Remotely log in to any node in the source cluster and run the **docker pull** command to pull all images to the local host. + +#. Log in to the SWR console, click **Login Command** in the upper right corner of the page, and copy the command. + +#. Run the copied login command on the node. + + The message "Login Succeeded" will be displayed upon a successful login. + +#. Add tags to all local images. + + .. code-block:: + + docker tag [Image name 1:tag 1] [Image repository address]/[Organization name]/[Image name 2:tag 2] + + - *[Image name 1*:*tag 1]*: name and tag of the local image to be pulled. + - *[Image repository address]*: You can query the image repository address on the SWR console. + - *[Organization name]*: Enter the name of the organization you created on the SWR console. + - *[Image name 2*:*tag 2]*: image name and tag displayed on the SWR console. + + **Example** + + .. code-block:: + + docker tag nginx:v1 swr.eu-de.otc.t-systems.com/cloud-develop/mynginx:v1 + +#. Run the **docker push** command to upload all local container image files to SWR. + + .. code-block:: + + docker push [Image repository address]/[Organization name]/[Image name 2:tag 2] + + **Example** + + .. code-block:: + + docker push swr.eu-de.otc.t-systems.com/cloud-develop/mynginx:v1 + +Migrating Databases and Storage (On-Demand) +------------------------------------------- + +You can determine whether to use **Relational Database Service (RDS)** and **Object Storage Service (OBS)** based on your production requirements. After the migration is complete, you need to reconfigure the database and storage for applications in the target CCE cluster. diff --git a/umn/source/best_practice/migration/migrating_on-premises_kubernetes_clusters_to_cce/performing_additional_tasks.rst b/umn/source/best_practice/migration/migrating_on-premises_kubernetes_clusters_to_cce/performing_additional_tasks.rst new file mode 100644 index 0000000..57ce0ad --- /dev/null +++ b/umn/source/best_practice/migration/migrating_on-premises_kubernetes_clusters_to_cce/performing_additional_tasks.rst @@ -0,0 +1,34 @@ +:original_name: cce_bestpractice_0313.html + +.. _cce_bestpractice_0313: + +Performing Additional Tasks +=========================== + +Verifying Application Functions +------------------------------- + +Cluster migration involves full migration of application data, which may cause intra-application adaptation problems. In this example, after the cluster is migrated, the redirection link of the article published in WordPress is still the original domain name. If you click the article title, you will be redirected to the application in the source cluster. Therefore, you need to search for the original domain name in WordPress and replace it with the new domain name, change the values of **site_url** and primary URL in the database. For details, see `Changing The Site URL `__. + +Access the new address of the WordPress application. If the article published before the migration is displayed, the data of the persistent volume is successfully restored. + +|image1| + +Switching Live Traffic to the Target Cluster +-------------------------------------------- + +O&M personnel switch DNS to direct live traffic to the target cluster. + +- DNS traffic switching: Adjust the DNS configuration to switch traffic. +- Client traffic switching: Upgrade the client code or update the configuration to switch traffic. + +Bringing the Source Cluster Offline +----------------------------------- + +After confirming that the service on the target cluster is normal, bring the source cluster offline and delete the backup files. + +- Verify that the service on the target cluster is running properly. +- Bring the source cluster offline. +- Delete backup files. + +.. |image1| image:: /_static/images/en-us_image_0000001217183707.png diff --git a/umn/source/best_practice/migration/migrating_on-premises_kubernetes_clusters_to_cce/planning_resources_for_the_target_cluster.rst b/umn/source/best_practice/migration/migrating_on-premises_kubernetes_clusters_to_cce/planning_resources_for_the_target_cluster.rst new file mode 100644 index 0000000..2877969 --- /dev/null +++ b/umn/source/best_practice/migration/migrating_on-premises_kubernetes_clusters_to_cce/planning_resources_for_the_target_cluster.rst @@ -0,0 +1,48 @@ +:original_name: cce_bestpractice_0308.html + +.. _cce_bestpractice_0308: + +Planning Resources for the Target Cluster +========================================= + +CCE allows you to customize cluster resources to meet various service requirements. :ref:`Table 1 ` lists the key performance parameters of a cluster and provides the planned values. You can set the parameters based on your service requirements. It is recommended that the performance configuration be the same as that of the source cluster. + +.. important:: + + After a cluster is created, the resource parameters marked with asterisks (``*``) in :ref:`Table 1 ` cannot be modified. + +.. _cce_bestpractice_0308__table1841815113913: + +.. table:: **Table 1** CCE cluster planning + + +-----------------+-----------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------+ + | Resource | Key Performance Parameter | Description | Example Value | + +=================+===========================================================+==============================================================================================================================================================================================================================================================================================================================================================================================+=================================================================+ + | Cluster | **\***\ Cluster Type | - **CCE cluster**: supports heterogeneous nodes such as GPU. You can run your containers in a secure and stable container runtime environment based on a high-performance network model. | CCE cluster | + | | | - **CCE Turbo cluster**: runs on a cloud native infrastructure that features software-hardware synergy to support passthrough networking, high security and reliability, and intelligent scheduling. | | + +-----------------+-----------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------+ + | | **\***\ Network Model | - **VPC network**: The container network uses VPC routing to integrate with the underlying network. This network model is applicable to performance-intensive scenarios. The maximum number of nodes allowed in a cluster depends on the route quota in a VPC network. | VPC network | + | | | - **Tunnel network**: The container network is an overlay tunnel network on top of a VPC network and uses the VXLAN technology. This network model is applicable when there is no high requirements on performance. | | + | | | - **Cloud Native Network 2.0**: The container network deeply integrates the elastic network interface (ENI) capability of VPC, uses the VPC CIDR block to allocate container addresses, and supports passthrough networking to containers through a load balancer. | | + +-----------------+-----------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------+ + | | **\***\ Number of master nodes | - **3**: Three master nodes will be created to deliver better DR performance. If one master node is faulty, the cluster can still be available without affecting service functions. | 3 | + | | | - **1**: A single master node will be created. This mode is not recommended in commercial scenarios. | | + +-----------------+-----------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------+ + | Node | OS | - EulerOS | EulerOS | + | | | - CentOS | | + +-----------------+-----------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------+ + | | Node Specifications (vary depending on the actual region) | - **General-purpose**: provides a balance of computing, memory, and network resources. It is a good choice for many applications. General-purpose nodes can be used for web servers, workload development, workload testing, and small-scale databases. | General-purpose (node specifications: 4 vCPUs and 8 GiB memory) | + | | | - **Memory-optimized**: provides higher memory capacity than general-purpose nodes and is suitable for relational databases, NoSQL, and other workloads that are both memory-intensive and data-intensive. | | + | | | - **General computing-basic**: provides a balance of computing, memory, and network resources and uses the vCPU credit mechanism to ensure baseline computing performance. Nodes of this type are suitable for applications requiring burstable high performance, such as light-load web servers, enterprise R&D and testing environments, and low- and medium-performance databases. | | + | | | - **GPU-accelerated**: provides powerful floating-point computing and is suitable for real-time, highly concurrent massive computing. Graphical processing units (GPUs) of P series are suitable for deep learning, scientific computing, and CAE. GPUs of G series are suitable for 3D animation rendering and CAD. GPU-accelerated nodes can be added only to clusters of v1.11 or later. | | + | | | - **Disk-intensive**: supports local disk storage and provides high networking performance. It is designed for workloads requiring high throughput and data switching, such as big data workloads. | | + +-----------------+-----------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------+ + | | System Disk | - **Common I/O**: The backend storage media is SATA disks. | High I/O | + | | | - **High I/O**: The backend storage media is SAS disks. | | + | | | - **Ultra-high I/O**: The backend storage media is SSD disks. | | + +-----------------+-----------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------+ + | | Storage Type | - **EVS volumes**: Mount an EVS volume to a container path. When containers are migrated, the attached EVS volumes are migrated accordingly. This storage mode is suitable for data that needs to be permanently stored. | EVS volumes | + | | | - **SFS volumes**: Create SFS volumes and mount them to a container path. The file system volumes created by the underlying SFS service can also be used. SFS volumes are applicable to persistent storage for frequent read/write in multiple workload scenarios, including media processing, content management, big data analysis, and workload analysis. | | + | | | - **OBS volumes**: Create OBS volumes and mount them to a container path. OBS volumes are applicable to scenarios such as cloud workload, data analysis, content analysis, and hotspot objects. | | + | | | - **SFS Turbo volumes**: Create SFS Turbo volumes and mount them to a container path. SFS Turbo volumes are fast, on-demand, and scalable, which makes them suitable for DevOps, containerized microservices, and enterprise office applications. | | + +-----------------+-----------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------+ diff --git a/umn/source/best_practice/migration/migrating_on-premises_kubernetes_clusters_to_cce/solution_overview.rst b/umn/source/best_practice/migration/migrating_on-premises_kubernetes_clusters_to_cce/solution_overview.rst new file mode 100644 index 0000000..eb667e6 --- /dev/null +++ b/umn/source/best_practice/migration/migrating_on-premises_kubernetes_clusters_to_cce/solution_overview.rst @@ -0,0 +1,109 @@ +:original_name: cce_bestpractice_0307.html + +.. _cce_bestpractice_0307: + +Solution Overview +================= + +Scenario +-------- + +Containers are growing in popularity and Kubernetes simplifies containerized deployment. Many companies choose to build their own Kubernetes clusters. However, the O&M workload of on-premises clusters is heavy, and O&M personnel need to configure the management systems and monitoring solutions by themselves. This increases the labor costs while decreasing the efficiency. + +In terms of performance, an on-premises cluster has poor scalability due to its fixed specifications. Auto scaling cannot be implemented in case of traffic surges, which may easily result in the insufficient or waste of cluster resources. In addition, an on-premises cluster is usually deployed on a single node without considering disaster recovery risks. Once a fault occurs, the entire cluster cannot be used, which may cause serious production incident. + +Now you can address the preceding challenges by using CCE, a service that allows easy cluster management and flexible scaling, integrated with application service mesh and Helm charts to simplify cluster O&M and reduce operations costs. CCE is easy to use and delivers high performance, security, reliability, openness, and compatibility. This section describes the solution and procedure for migrating on-premises clusters to CCE. + +.. _cce_bestpractice_0307__section96147345128: + +Migration Solution +------------------ + +This section describes a cluster migration solution, which applies to the following types of clusters: + +- Kubernetes clusters built in local IDCs +- On-premises clusters built using multiple ECSs +- Cluster services provided by other cloud service providers + +Before the migration, you need to analyze all resources in the source clusters and then determine the migration solution. Resources that can be migrated include resources inside and outside the clusters, as listed in the following table. + +.. _cce_bestpractice_0307__table1126932541820: + +.. table:: **Table 1** Resources that can be migrated + + +-----------------------------+----------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | Category | Migration Object | Remarks | + +=============================+========================================================================================+================================================================================================================================================================================================================================================================================================================================================+ + | Resources inside a cluster | All objects in a cluster, including pods, jobs, Services, Deployments, and ConfigMaps. | You are not advised to migrate the resources in the **velero** and **kube-system** namespaces. | + | | | | + | | | - **velero**: Resources in this namespace are created by the migration tool and do not need to be migrated. | + | | | - **kube-system**: Resources in this namespace are system resources. If this namespace of the source cluster contains resources created by users, migrate the resources on demand. | + | | | | + | | | .. caution:: | + | | | | + | | | CAUTION: | + | | | If you are migrating or backing up cluster resources in CCE, for example, from a namespace to another, do not back up Secret **paas.elb**. It is because secret **paas.elb** is periodically updated. After the backup is complete, the secret may become invalid when it is restored. As a result, network storage functions are affected. | + +-----------------------------+----------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | PersistentVolumes (PVs) mounted to containers | Due to restrictions of the Restic tool, migration is not supported for the hostPath storage volume. For details about how to solve the problem, see :ref:`Storage Volumes of the HostPath Type Cannot Be Backed Up `. | + +-----------------------------+----------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | Resources outside a cluster | On-premises image repository | Resources can be migrated to SoftWare Repository for Container (SWR). | + +-----------------------------+----------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | Non-containerized database | Resources can be migrated to Relational Database Service (RDS). | + +-----------------------------+----------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | | Non-local storage, such as object storage | Resources can be migrated to Object Storage Service (OBS). | + +-----------------------------+----------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + +:ref:`Figure 1 ` shows the migration process. You can migrate resources outside a cluster as required. + +.. _cce_bestpractice_0307__fig203631140201419: + +.. figure:: /_static/images/en-us_image_0000001172392670.png + :alt: **Figure 1** Migration solution diagram + + **Figure 1** Migration solution diagram + +Migration Process +----------------- + +|image1| + +The cluster migration process is as follows: + +#. **Plan resources for the target cluster.** + + For details about the differences between CCE clusters and on-premises clusters, see **Key Performance Parameter** in :ref:`Planning Resources for the Target Cluster `. Plan resources as required and ensure that the performance configuration of the target cluster is the same as that of the source cluster. + +#. **Migrate resources outside a cluster.** + + If you need to migrate resources outside the cluster, see :ref:`Migrating Resources Outside a Cluster `. + +#. **Install the migration tool.** + + After resources outside a cluster are migrated, you can use a migration tool to back up and restore application configurations in the source and target clusters. For details about how to install the tool, see :ref:`Installing the Migration Tool `. + +#. **Migrate resources in the cluster.** + + Use Velero to back up resources in the source cluster to OBS and restore the resources in the target cluster. For details, see :ref:`Migrating Resources in a Cluster `. + + - :ref:`Backing Up Applications in the Source Cluster ` + + To back up resources, use the Velero tool to create a backup object in the original cluster, query and back up cluster data and resources, package the data, and upload the package to the object storage that is compatible with the S3 protocol. Cluster resources are stored in the JSON format. + + - :ref:`Restoring Applications in the Target Cluster ` + + During restoration in the target cluster, Velero specifies the temporary object bucket that stores the backup data, downloads the backup data to the new cluster, and redeploys resources based on the JSON file. + +#. **Update resources accordingly.** + + After the migration, cluster resources may fail to be deployed. You need to update the faulty resources. The possible adaptation problems are as follows: + + - :ref:`Updating Images ` + - :ref:`Updating Services ` + - :ref:`Updating the Storage Class ` + - :ref:`Updating Databases ` + +#. **Perform additional tasks.** + + After cluster resources are properly deployed, verify application functions after the migration and switch service traffic to the target cluster. After confirming that all services are running properly, bring the source cluster offline. + +.. |image1| image:: /_static/images/en-us_image_0000001218074121.png diff --git a/umn/source/best_practice/migration/migrating_on-premises_kubernetes_clusters_to_cce/troubleshooting.rst b/umn/source/best_practice/migration/migrating_on-premises_kubernetes_clusters_to_cce/troubleshooting.rst new file mode 100644 index 0000000..864f9c6 --- /dev/null +++ b/umn/source/best_practice/migration/migrating_on-premises_kubernetes_clusters_to_cce/troubleshooting.rst @@ -0,0 +1,118 @@ +:original_name: cce_bestpractice_0314.html + +.. _cce_bestpractice_0314: + +Troubleshooting +=============== + +.. _cce_bestpractice_0314__section11197194820367: + +Storage Volumes of the HostPath Type Cannot Be Backed Up +-------------------------------------------------------- + +Both HostPath and Local volumes are local storage volumes. However, the Restic tool integrated in Velero cannot back up the PVs of the HostPath type and supports only the Local type. Therefore, you need to replace the storage volumes of the HostPath type with the Local type in the source cluster. + +.. note:: + + It is recommended that Local volumes be used in Kubernetes v1.10 or later and can only be statically created. For details, see `local `__. + +#. Create a storage class for the Local volume. + + Example YAML: + + .. code-block:: + + apiVersion: storage.k8s.io/v1 + kind: StorageClass + metadata: + name: local + provisioner: kubernetes.io/no-provisioner + volumeBindingMode: WaitForFirstConsumer + +#. Change the **hostPath** field to the **local** field, specify the original local disk path of the host machine, and add the **nodeAffinity** field. + + Example YAML: + + .. code-block:: + + apiVersion: v1 + kind: PersistentVolume + metadata: + name: mysql-pv + labels: + app: mysql + spec: + accessModes: + - ReadWriteOnce + capacity: + storage: 5Gi + storageClassName: local # Storage class created in the previous step + persistentVolumeReclaimPolicy: Delete + local: + path: "/mnt/data" # Path of the attached local disk + nodeAffinity: + required: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.io/hostname + operator: Exists + +#. Run the following commands to verify the creation result: + + .. code-block:: + + kubectl get pv + + Information similar to the following is displayed: + + .. code-block:: + + NAME CAPACITY ACCESS MODES RECLAIM POLICY STATUS CLAIM STORAGECLASS REASON AGE + mysql-pv 5Gi RWO Delete Available local 3s + +.. _cce_bestpractice_0314__section321054511332: + +Backup Tool Resources Are Insufficient +-------------------------------------- + +In the production environment, if there are many backup resources, for example, the default resource size of the backup tool is used, the resources may be insufficient. In this case, perform the following steps to adjust the CPU and memory size allocated to the Velero and Restic: + +**Before installing Velero:** + +You can specify the size of resources used by Velero and Restic when :ref:`installing Velero `. + +The following is an example of installation parameters: + +.. code-block:: + + velero install \ + --velero-pod-cpu-request 500m \ + --velero-pod-mem-request 1Gi \ + --velero-pod-cpu-limit 1000m \ + --velero-pod-mem-limit 1Gi \ + --use-restic \ + --restic-pod-cpu-request 500m \ + --restic-pod-mem-request 1Gi \ + --restic-pod-cpu-limit 1000m \ + --restic-pod-mem-limit 1Gi + +**After Velero is installed:** + +#. Edit the YAML files of the Velero and Restic workloads in the **velero** namespace. + + .. code-block:: + + kubectl edit deploy velero -n velero + kubectl edit deploy restic -n velero + +#. Modify the resource size under the **resources** field. The modification is the same for the Velero and Restic workloads, as shown in the following: + + .. code-block:: + + resources: + limits: + cpu: "1" + memory: 1Gi + requests: + cpu: 500m + memory: 1Gi diff --git a/umn/source/best_practice/migration/migrating_on-premises_kubernetes_clusters_to_cce/updating_resources_accordingly.rst b/umn/source/best_practice/migration/migrating_on-premises_kubernetes_clusters_to_cce/updating_resources_accordingly.rst new file mode 100644 index 0000000..2acb129 --- /dev/null +++ b/umn/source/best_practice/migration/migrating_on-premises_kubernetes_clusters_to_cce/updating_resources_accordingly.rst @@ -0,0 +1,203 @@ +:original_name: cce_bestpractice_0312.html + +.. _cce_bestpractice_0312: + +Updating Resources Accordingly +============================== + +.. _cce_bestpractice_0312__section7125750134820: + +Updating Images +--------------- + +The WordPress and MySQL images used in this example can be pulled from SWR. Therefore, the image pull failure (ErrImagePull) will not occur. If the application to be migrated is created from a private image, perform the following steps to update the image: + +#. Migrate the image resources to SWR. For details, see `Uploading an Image Through a Container Engine Client `__. + +#. Log in to the SWR console and obtain the image path used after the migration. + + The image path is in the following format: + + .. code-block:: + + 'swr.{Region}.otc.t-systems.com/{Organization name}/{Image name}:{Tag} + +#. Run the following command to modify the workload and replace the **image** field in the YAML file with the image path: + + .. code-block:: + + kubectl edit deploy wordpress + +#. Check the running status of the workload. + +.. _cce_bestpractice_0312__section41282507482: + +Updating Services +----------------- + +After the cluster is migrated, the Service of the source cluster may fail to take effect. You can perform the following steps to update the Service. If ingresses are configured in the source cluster, you need to connect the new cluster to ELB again after the migration. For details, see `Using kubectl to Create an ELB Ingress `__. + +#. Connect to the cluster using kubectl. + +#. Edit the YAML file of the corresponding Service to change the Service type and port number. + + .. code-block:: + + kubectl edit svc wordpress + + To update load balancer resources, you need to connect to ELB again. Add the annotations by following the procedure described in `LoadBalancer `__. + + .. code-block:: + + annotations: + kubernetes.io/elb.class: union # Shared load balancer + kubernetes.io/elb.id: 9d06a39d-xxxx-xxxx-xxxx-c204397498a3 # Load balancer ID, which can be queried on the ELB console. + kubernetes.io/elb.subnet-id: f86ba71c-xxxx-xxxx-xxxx-39c8a7d4bb36 # ID of the cluster where the subnet resides + kubernetes.io/session-affinity-mode: SOURCE_IP # Enable the sticky session based on the source IP address. + +#. Use a browser to check whether the Service is available. + +.. _cce_bestpractice_0312__section746195321414: + +Updating the Storage Class +-------------------------- + +As the storage infrastructures of clusters may be different, storage volumes cannot be mounted to the target cluster. You can use either of the following methods to update the volumes: + +.. important:: + + Both update methods can be performed only before the application is restored in the target cluster. Otherwise, PV data resources may fail to be restored. In this case, use the Velero to restore applications after the storage class update is complete. For details, see :ref:`Restoring Applications in the Target Cluster `. + +**Method 1: Creating a ConfigMap mapping** + +#. Create a ConfigMap in the CCE cluster and map the storage class used by the source cluster to the default storage class of the CCE cluster. + + .. code-block:: + + apiVersion: v1 + kind: ConfigMap + metadata: + name: change-storageclass-plugin-config + namespace: velero + labels: + app.kubernetes.io/name: velero + velero.io/plugin-config: "true" + velero.io/change-storage-class: RestoreItemAction + data: + {Storage class name01 in the source cluster}: {Storage class name01 in the target cluster} + {Storage class name02 in the source cluster}: {Storage class name02 in the target cluster} + +#. Run the following command to apply the ConfigMap configuration: + + .. code-block:: + + $ kubectl create -f change-storage-class.yaml + configmap/change-storageclass-plugin-config created + +**Method 2: Creating a storage class with the same name** + +#. Run the following command to query the default storage class supported by CCE: + + .. code-block:: + + kubectl get sc + + Information similar to the following is displayed: + + .. code-block:: + + NAME PROVISIONER RECLAIMPOLICY VOLUMEBINDINGMODE ALLOWVOLUMEEXPANSION AGE + csi-disk everest-csi-provisioner Delete Immediate true 3d23h + csi-disk-topology everest-csi-provisioner Delete WaitForFirstConsumer true 3d23h + csi-nas everest-csi-provisioner Delete Immediate true 3d23h + csi-obs everest-csi-provisioner Delete Immediate false 3d23h + csi-sfsturbo everest-csi-provisioner Delete Immediate true 3d23h + + .. table:: **Table 1** Storage classes + + ================= ======================== + Storage Class Storage Resource + ================= ======================== + csi-disk EVS + csi-disk-topology EVS with delayed binding + csi-nas SFS + csi-obs OBS + csi-sfsturbo SFS Turbo + ================= ======================== + +#. Run the following command to export the required storage class details in YAML format: + + .. code-block:: + + kubectl get sc -o=yaml + +#. Copy the YAML file and create a new storage class. + + Change the storage class name to the name used in the source cluster to call basic storage resources of the cloud. + + The YAML file of csi-obs is used as an example. Delete the unnecessary information in italic under the **metadata** field and modify the information in bold. You are advised not to modify other parameters. + + .. code-block:: + + apiVersion: storage.k8s.io/v1 + kind: StorageClass + metadata: + creationTimestamp: "2021-10-18T06:41:36Z" + name: # Use the name of the storage class used in the source cluster. + resourceVersion: "747" + selfLink: /apis/storage.k8s.io/v1/storageclasses/csi-obs + uid: 4dbbe557-ddd1-4ce8-bb7b-7fa15459aac7 + parameters: + csi.storage.k8s.io/csi-driver-name: obs.csi.everest.io + csi.storage.k8s.io/fstype: obsfs + everest.io/obs-volume-type: STANDARD + provisioner: everest-csi-provisioner + reclaimPolicy: Delete + volumeBindingMode: Immediate + + .. note:: + + - SFS Turbo file systems cannot be directly created using StorageClass. You need to go to the SFS Turbo console to create SFS Turbo file systems that belong to the same VPC subnet and have inbound ports (111, 445, 2049, 2051, 2052, and 20048) enabled in the security group. + - CCE does not support EVS disks of the ReadWriteMany type. If resources of this type exist in the source cluster, change the storage type to **ReadWriteOnce**. + +#. Restore the cluster application by referring to :ref:`Restoring Applications in the Target Cluster ` and check whether the PVC is successfully created. + + .. code-block:: + + kubectl get pvc + + In the command output, the **VOLUME** column indicates the name of the PV automatically created using the storage class. + + .. code-block:: + + NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS AGE + pvc Bound pvc-4c8e655a-1dbc-4897-ae6c-446b502f5e77 5Gi RWX local 13s + +.. _cce_bestpractice_0312__section728213614323: + +Updating Databases +------------------ + +In this example, the database is a local MySQL database and does not need to be reconfigured after the migration. + +.. note:: + + - If the RDS instance is in the same VPC as the CCE cluster, it can be accessed using the private IP address. Otherwise, it can only be accessed only through public networks by binding an EIP. You are advised to use the private network access mode for high security and good RDS performance. + - Ensure that the inbound rule of the security group to which RDS belongs has been enabled for the cluster. Otherwise, the connection will fail. + +#. Log in to the RDS console and obtain the private IP address and port number of the DB instance on the **Basic Information** page. + +#. Run the following command to modify the WordPress workload: + + .. code-block:: + + kubectl edit deploy wordpress + + Set the environment variables in the **env** field. + + - **WORDPRESS_DB_HOST**: address and port number used for accessing the database, that is, the internal network address and port number obtained in the previous step. + - **WORDPRESS_DB_USERU**: username for accessing the database. + - **WORDPRESS_DB_PASSWORD**: password for accessing the database. + - **WORDPRESS_DB_NAME**: name of the database to be connected. + +#. Check whether the RDS database is properly connected. diff --git a/umn/source/best_practice/networking/implementing_sticky_session_through_load_balancing.rst b/umn/source/best_practice/networking/implementing_sticky_session_through_load_balancing.rst new file mode 100644 index 0000000..4196a6b --- /dev/null +++ b/umn/source/best_practice/networking/implementing_sticky_session_through_load_balancing.rst @@ -0,0 +1,189 @@ +:original_name: cce_bestpractice_00231.html + +.. _cce_bestpractice_00231: + +Implementing Sticky Session Through Load Balancing +================================================== + +Concepts +-------- + +Session persistence is one of the most common while complex problems in load balancing. + +Session persistence is also called sticky sessions. After the sticky session function is enabled, requests from the same client are distributed to the same backend ECS by the load balancer for better continuity. + +In load balancing and sticky session, connection and session are two key concepts. When only load balancing is concerned, session and connection refer to the same thing. + +Simply put, if a user needs to log in, it can be regarded as a session; otherwise, a connection. + +The sticky session mechanism fundamentally conflicts with the basic functions of load balancing. A load balancer forwards requests from clients to multiple backend servers to avoid overload on a single server. However, sticky session requires that some requests be forwarded to the same server for processing. Therefore, you need to select a proper sticky session mechanism based on the application environment. + +Layer-4 Load Balancing (Service) +-------------------------------- + +In layer-4 load balancing, source IP address-based sticky session (Hash routing based on the client IP address) can be enabled. To enable source IP address-based sticky session on Services, the following conditions must be met: + +#. **Service Affinity** of the Service is set to **Node level** (that is, the value of the **externalTrafficPolicy** field of the Service is **Local**). + +#. Enable the source IP address-based sticky session in the load balancing configuration of the Service. + + .. code-block:: + + apiVersion: v1 + kind: Service + metadata: + name: svc-example + namespace: default + annotations: + kubernetes.io/elb.class: union + kubernetes.io/elb.id: 56dcc1b4-8810-480c-940a-a44f7736f0dc + kubernetes.io/elb.lb-algorithm: ROUND_ROBIN + kubernetes.io/elb.session-affinity-mode: SOURCE_IP + spec: + selector: + app: nginx + externalTrafficPolicy: Local + ports: + - name: cce-service-0 + targetPort: 80 + nodePort: 32633 + port: 80 + protocol: TCP + type: LoadBalancer + +#. Anti-affinity is enabled for the backend application corresponding to the Service. + +Layer-7 Load Balancing (Ingress) +-------------------------------- + +In layer-7 load balancing, sticky session based on HTTP cookies and app cookies can be enabled. To enable such sticky session, the following conditions must be met: + +#. The application (workload) corresponding to the ingress is enabled with workload anti-affinity. +#. Node affinity is enabled for the Service corresponding to the ingress. + +**Procedure** + +#. Create a Nginx workload. + + Set the number of pods to 3 and set the podAntiAffinity. + + .. code-block:: + + kind: Deployment + apiVersion: apps/v1 + metadata: + name: nginx + namespace: default + spec: + replicas: 3 + selector: + matchLabels: + app: nginx + template: + metadata: + labels: + app: nginx + spec: + containers: + - name: container-0 + image: 'nginx:perl' + resources: + limits: + cpu: 250m + memory: 512Mi + requests: + cpu: 250m + memory: 512Mi + imagePullSecrets: + - name: default-secret + affinity: + podAntiAffinity: # Pod anti-affinity. + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: app + operator: In + values: + - nginx + topologyKey: kubernetes.io/hostname + +#. Creating a NodePort Service + + Configure the sticky session in a Service. An ingress can connect to multiple Services, and each Service can have different sticky sessions. + + .. code-block:: + + apiVersion: v1 + kind: Service + metadata: + name: nginx + namespace: default + annotations: + kubernetes.io/elb.lb-algorithm: ROUND_ROBIN + kubernetes.io/elb.session-affinity-mode: HTTP_COOKIE # HTTP cookie type. + kubernetes.io/elb.session-affinity-option: '{"persistence_timeout":"1440"}' # Session stickiness duration, in minutes. The value ranges from 1 to 1440. + spec: + selector: + app: nginx + ports: + - name: cce-service-0 + protocol: TCP + port: 80 + targetPort: 80 + nodePort: 32633 # Node port number. + type: NodePort + externalTrafficPolicy: Local # Node-level forwarding. + + You can also select **APP_COOKIE**. + + .. code-block:: + + apiVersion: v1 + kind: Service + metadata: + name: nginx + namespace: default + annotations: + kubernetes.io/elb.lb-algorithm: ROUND_ROBIN + kubernetes.io/elb.session-affinity-mode: APP_COOKIE # Select APP_COOKIE. + kubernetes.io/elb.session-affinity-option: '{"app_cookie_name":"test"}' # Application cookie name. + ... + +#. Create an ingress and associate it with a Service. The following example describes how to automatically create a shared load balancer. For details about how to specify other types of load balancers, see `Using kubectl to Create an ELB Ingress `__. + + .. code-block:: + + apiVersion: networking.k8s.io/v1 + kind: Ingress + metadata: + name: ingress-test + namespace: default + annotations: + kubernetes.io/elb.class: union + kubernetes.io/elb.port: '80' + kubernetes.io/elb.autocreate: + '{ + "type":"public", + "bandwidth_name":"cce-bandwidth-test", + "bandwidth_chargemode":"traffic", + "bandwidth_size":1, + "bandwidth_sharetype":"PER", + "eip_type":"5_bgp" + }' + spec: + rules: + - host: 'www.example.com' + http: + paths: + - path: '/' + backend: + service: + name: nginx #Service name + port: + number: 80 + property: + ingress.beta.kubernetes.io/url-match-mode: STARTS_WITH + pathType: ImplementationSpecific + ingressClassName: cce + +#. Log in to the ELB console, access the load balancer details page, and check whether the sticky session feature is enabled. diff --git a/umn/source/best_practice/networking/index.rst b/umn/source/best_practice/networking/index.rst new file mode 100644 index 0000000..a6560a3 --- /dev/null +++ b/umn/source/best_practice/networking/index.rst @@ -0,0 +1,20 @@ +:original_name: cce_bestpractice_0052.html + +.. _cce_bestpractice_0052: + +Networking +========== + +- :ref:`Planning CIDR Blocks for a Cluster ` +- :ref:`Selecting a Network Model ` +- :ref:`Implementing Sticky Session Through Load Balancing ` +- :ref:`Obtaining the Client Source IP Address for a Container ` + +.. toctree:: + :maxdepth: 1 + :hidden: + + planning_cidr_blocks_for_a_cluster + selecting_a_network_model + implementing_sticky_session_through_load_balancing + obtaining_the_client_source_ip_address_for_a_container diff --git a/umn/source/best_practice/networking/obtaining_the_client_source_ip_address_for_a_container.rst b/umn/source/best_practice/networking/obtaining_the_client_source_ip_address_for_a_container.rst new file mode 100644 index 0000000..8c90f70 --- /dev/null +++ b/umn/source/best_practice/networking/obtaining_the_client_source_ip_address_for_a_container.rst @@ -0,0 +1,78 @@ +:original_name: cce_bestpractice_00035.html + +.. _cce_bestpractice_00035: + +Obtaining the Client Source IP Address for a Container +====================================================== + +Background +---------- + +There may be different types of proxy servers between a client and a container server. How can a container obtain the real source IP address of the client? This section describes several scenarios you may encounter. + +Principles +---------- + +|image1| + +**Layer-7 forwarding:** + +Ingress: If this access mode is used, the client source IP address is saved in the **X-Forwarded-For** HTTP header field by default. No other configuration is required. + +- ELB ingress: A self-developed ingress to implement layer-7 network access between the internet and intranet (in the same VPC) based on ELB. If the backend Service type is **NodePort**, set **Service Affinity** to **Node level**. + +**Layer-4 forwarding:** + +- LoadBalancer: Use ELB to achieve load balancing. You can manually enable the **Obtain Client IP Address** option for TCP and UDP listeners of shared load balancers. By default, the **Obtain Client IP Address** option is enabled for TCP and UDP listeners of dedicated load balancers. You do not need to manually enable it. +- NodePort: In this access mode, the container port is mapped to the node port. If cluster-level affinity is configured, access requests will be forwarded through the node and the client source IP address cannot be obtained. If node-level affinity is configured, access requests are not forwarded and the client source IP address can be obtained. + +Ingress +------- + +Configure the application server and obtain the IP address of a client from the HTTP header. + +The real IP address is placed in the **X-Forwarded-For** HTTP header field by the load balancer in the following format: + +.. code-block:: + + X-Forwarded-For: IP address of the client,Proxy server 1-IP address,Proxy server 2-IP address,... + +If you use this method, the first IP address obtained is the IP address of the client. + +For details, see `How Can I Obtain the IP Address of a Client? `__ + +.. note:: + + - When adding an ingress, if the backend service is of the NodePort type, set **Service Affinity** to **Node level**, that is, set **spec.externalTrafficPolicy** to **Local**. For details, see :ref:`NodePort `. + +LoadBalancer +------------ + +For a LoadBalancer Service, different types of clusters obtain source IP addresses in different scenarios. In some scenarios, source IP addresses cannot be obtained currently. + +**VPC and Container Tunnel Network Models** + +To obtain source IP addresses, perform the following steps: + +#. When creating a LoadBalancer Service on the CCE console, set **Service Affinity** to **Node level** instead of **Cluster level**. +#. Go to the ELB console and enable the function of obtaining the client IP address of the listener corresponding to the load balancer. **Transparent transmission of source IP addresses is enabled for dedicated load balancers by default. You do not need to manually enable this function.** + + a. Log in to the ELB console. + b. Click |image2| in the upper left corner to select the desired region and project. + c. Click **Service List**. Under **Networking**, click **Elastic Load Balance**. + d. On the **Load Balancers** page, click the name of the load balancer. + e. Click **Listeners**. + f. To add a listener, click **Add Listener**. + g. To modify a listener, locate the listener and click |image3| on the right of its name. + h. Enable **Obtain Client IP Address**. + +.. _cce_bestpractice_00035__section6340152911914: + +NodePort +-------- + +Set the service affinity of a NodePort Service to **Node level** instead of **Cluster level**. That is, set **spec.externalTrafficPolicy** of the Service to **Local**. + +.. |image1| image:: /_static/images/en-us_image_0000001176818150.png +.. |image2| image:: /_static/images/en-us_image_0000001221501677.png +.. |image3| image:: /_static/images/en-us_image_0000001221820189.png diff --git a/umn/source/best_practice/networking/planning_cidr_blocks_for_a_cluster.rst b/umn/source/best_practice/networking/planning_cidr_blocks_for_a_cluster.rst new file mode 100644 index 0000000..77090e8 --- /dev/null +++ b/umn/source/best_practice/networking/planning_cidr_blocks_for_a_cluster.rst @@ -0,0 +1,180 @@ +:original_name: cce_bestpractice_00004.html + +.. _cce_bestpractice_00004: + +Planning CIDR Blocks for a Cluster +================================== + +Before creating a cluster on CCE, determine the number of VPCs, number of subnets, container CIDR blocks, and Services for access based on service requirements. + +This topic describes the addresses in a CCE cluster in a VPC and how to plan CIDR blocks. + +Notes and Constraints +--------------------- + +To access a CCE cluster through a VPN, ensure that the VPN does not conflict with the VPC CIDR block where the cluster resides and the container CIDR block. + +Basic Concepts +-------------- + +- **VPC CIDR Block** + + Virtual Private Cloud (VPC) enables you to provision logically isolated, configurable, and manageable virtual networks for cloud servers, cloud containers, and cloud databases. You have complete control over your virtual network, including selecting your own CIDR block, creating subnets, and configuring security groups. You can also assign EIPs and allocate bandwidth in your VPC for secure and easy access to your business system. + +- **Subnet CIDR Block** + + A subnet is a network that manages ECS network planes. It supports IP address management and DNS. The IP addresses of all ECSs in a subnet belong to the subnet. + + + .. figure:: /_static/images/en-us_image_0261818822.png + :alt: **Figure 1** VPC CIDR block architecture + + **Figure 1** VPC CIDR block architecture + + By default, ECSs in all subnets of the same VPC can communicate with one another, while ECSs in different VPCs cannot communicate with each other. + + You can create a peering connection on VPC to enable ECSs in different VPCs to communicate with each other. + +- **Container (Pod) CIDR Block** + + Pod is a Kubernetes concept. Each pod has an IP address. + + When creating a cluster on CCE, you can specify the pod (container) CIDR block, which cannot overlap with the subnet CIDR block. For example, if the subnet CIDR block is 192.168.0.0/16, the container CIDR block cannot be 192.168.0.0/18 or 192.168.1.0/18, because these addresses are included in 192.168.0.0/16. + +- **Container Subnet** (Only for CCE Turbo Clusters) + + In a CCE Turbo cluster, a container is assigned an IP address from the CIDR block of a VPC. The container subnet can overlap with the subnet CIDR block. Note that the subnet you select determines the maximum number of pods in the cluster. After a cluster is created, you can only add container subnets but cannot delete them. + +- **Service CIDR Block** + + Service is also a Kubernetes concept. Each Service has an address. When creating a cluster on CCE, you can specify the Service CIDR block. Similarly, the Service CIDR block cannot overlap with the subnet CIDR block or the container CIDR block. The Service CIDR block can be used only within a cluster. + +Single-VPC Single-Cluster Scenarios +----------------------------------- + +**CCE Clusters**: include clusters in VPC network model and container tunnel network model. :ref:`Figure 2 ` shows the CIDR block planning of a cluster. + +- VPC CIDR Block: specifies the VPC CIDR block where the cluster resides. The size of this CIDR block affects the maximum number of nodes that can be created in the cluster. +- Subnet CIDR Block: specifies the subnet CIDR block where the node in the cluster resides. The subnet CIDR block is included in the VPC CIDR block. Different nodes in the same cluster can be allocated to different subnet CIDR blocks. +- Container CIDR Block: cannot overlap with the subnet CIDR block. +- Service CIDR Block: cannot overlap with the subnet CIDR block or the container CIDR block. + +.. _cce_bestpractice_00004__en-us_topic_0099587154_fig15791152874920: + +.. figure:: /_static/images/en-us_image_0000001392318380.png + :alt: **Figure 2** Network CIDR block planning in the single-VPC single-cluster scenario (CCE cluster) + + **Figure 2** Network CIDR block planning in the single-VPC single-cluster scenario (CCE cluster) + +:ref:`Figure 3 ` shows the CIDR block planning for a **CCE Turbo cluster** (cloud native network 2.0). + +- VPC CIDR Block: specifies the VPC CIDR block where the cluster resides. The size of this CIDR block affects the maximum number of nodes that can be created in the cluster. +- Subnet CIDR Block: specifies the subnet CIDR block where the node in the cluster resides. The subnet CIDR block is included in the VPC CIDR block. Different nodes in the same cluster can be allocated to different subnet CIDR blocks. +- Container Subnet CIDR Block: The container subnet is included in the VPC CIDR block and can overlap with the subnet CIDR block or even be the same as the subnet CIDR block. Note that the container subnet size determines the maximum number of containers in the cluster because IP addresses in the VPC are directly allocated to containers. After a cluster is created, you can only add container subnets but cannot delete them. You are advised to set a larger IP address segment for the container subnet to prevent insufficient container IP addresses. +- Service CIDR Block: cannot overlap with the subnet CIDR block or the container CIDR block. + +.. _cce_bestpractice_00004__fig19746213285: + +.. figure:: /_static/images/en-us_image_0000001392280374.png + :alt: **Figure 3** CIDR block planning in the single-VPC single-cluster scenario (CCE Turbo cluster) + + **Figure 3** CIDR block planning in the single-VPC single-cluster scenario (CCE Turbo cluster) + +**Single-VPC Multi-Cluster Scenarios** +-------------------------------------- + +**VPC network model** + +Pod packets are forwarded through VPC routes. CCE automatically configures a routing table on the VPC routes to each container CIDR block. The network scale is limited by the VPC route table. :ref:`Figure 4 ` shows the CIDR block planning of the cluster. + +- VPC CIDR Block: specifies the VPC CIDR block where the cluster resides. The size of this CIDR block affects the maximum number of nodes that can be created in the cluster. +- Subnet CIDR Block: The subnet CIDR block in each cluster cannot overlap with the container CIDR block. +- Container CIDR Block: If multiple VPC network model clusters exist in a single VPC, the container CIDR blocks of all clusters cannot overlap because the clusters use the same routing table. In this case, CCE clusters are partially interconnected. A pod of a cluster can directly access the pods of another cluster, but cannot access the Services of the cluster. +- Service CIDR Block: can be used only in clusters. Therefore, the service CIDR blocks of different clusters can overlap, but cannot overlap with the subnet CIDR block and container CIDR block of the cluster to which the clusters belong. + +.. _cce_bestpractice_00004__en-us_topic_0099587154_fig69527530400: + +.. figure:: /_static/images/en-us_image_0261818824.png + :alt: **Figure 4** VPC network - multi-cluster scenario + + **Figure 4** VPC network - multi-cluster scenario + +**Tunnel Network** + +Though at some cost of performance, the tunnel encapsulation enables higher interoperability and compatibility with advanced features (such as network policy-based isolation), meeting the requirements of most applications. :ref:`Figure 5 ` shows the CIDR block planning of the cluster. + +- VPC CIDR Block: specifies the VPC CIDR block where the cluster resides. The size of this CIDR block affects the maximum number of nodes that can be created in the cluster. +- Subnet CIDR Block: The subnet CIDR block in each cluster cannot overlap with the container CIDR block. +- Container CIDR Block: The container CIDR blocks of all clusters can overlap. In this case, pods in different clusters cannot be directly accessed using IP addresses. It is recommended that ELB be used for the cross-cluster access between containers. +- Service CIDR Block: can be used only in clusters. Therefore, the service CIDR blocks of different clusters can overlap, but cannot overlap with the subnet CIDR block and container CIDR block of the cluster to which the clusters belong. + +.. _cce_bestpractice_00004__en-us_topic_0099587154_fig8672112184219: + +.. figure:: /_static/images/en-us_image_0261818885.png + :alt: **Figure 5** Tunnel network - multi-cluster scenario + + **Figure 5** Tunnel network - multi-cluster scenario + +**Cloud native network 2.0 network model** (CCE Turbo cluster) + +In this mode, container IP addresses are allocated from the VPC CIDR block. ELB passthrough networking is supported to direct access requests to containers. Security groups and multiple types of VPC networks can be bound to deliver high performance. + +- VPC CIDR Block: specifies the VPC CIDR block where the cluster resides. In a CCE Turbo cluster, the CIDR block size affects the total number of nodes and containers that can be created in the cluster. +- Subnet CIDR Block: There is no special restriction on the subnet CIDR blocks in CCE Turbo clusters. +- Container Subnet: The CIDR block of the container subnet is included in the VPC CIDR block. Container subnets in different clusters can overlap with each other or overlap with the subnet CIDR block. However, you are advised to stagger the container CIDR blocks of different clusters and ensure that the container subnet CIDR blocks have sufficient IP addresses. In this case, pods in different clusters can directly access each other through IP addresses. +- Service CIDR Block: can be used only in clusters. Therefore, the service CIDR blocks of different clusters can overlap, but cannot overlap with the subnet CIDR block or container CIDR block. + + +.. figure:: /_static/images/en-us_image_0000001392259910.png + :alt: **Figure 6** Cloud native network 2.0 network model - multi-cluster scenario + + **Figure 6** Cloud native network 2.0 network model - multi-cluster scenario + +**Coexistence of Clusters in Multi-Network** + +When a VPC contains clusters created with different network models, comply with the following rules when creating a cluster: + +- VPC CIDR Block: In this scenario, all clusters are located in the same VPC CIDR block. Ensure that there are sufficient available IP addresses in the VPC. +- Subnet CIDR Block: Ensure that the subnet CIDR block does not overlap with the container CIDR block. Even in some scenarios (for example, coexistence with CCE Turbo clusters), the subnet CIDR block can overlap with the container (subnet) CIDR block. However, this is not recommended. +- Container CIDR Block: Ensure that the container CIDR blocks of clusters in **VPC network model** do not overlap. +- Service CIDR Block: The service CIDR blocks of all clusters can overlap, but cannot overlap with the subnet CIDR block and container CIDR block of the cluster. + +Cross-VPC Cluster Interconnection +--------------------------------- + +When two VPC networks are interconnected, you can configure the packets to be sent to the peer VPC in the route table. + +In the VPC network model, after creating a peering connection, you need to add routes for the peering connection to enable communication between the two VPCs. + + +.. figure:: /_static/images/en-us_image_0261818886.png + :alt: **Figure 7** VPC Network - VPC interconnection scenario + + **Figure 7** VPC Network - VPC interconnection scenario + +When creating a VPC peering connection between containers across VPCs, pay attention to the following points: + +- The VPC to which the clusters belong must not overlap. In each cluster, the subnet CIDR block cannot overlap with the container CIDR block. +- The container CIDR blocks of clusters cannot overlap, but the Service CIDR blocks can. +- You need to add not only the peer VPC CIDR block but also the peer container CIDR block to the VPC routing tables at both ends. Note that this operation must be performed in the VPC route tables of the clusters. + +In the tunnel network model, after creating a peering connection, you need to add routes for the peering connection to enable communication between the two VPCs. + + +.. figure:: /_static/images/en-us_image_0000001082048529.png + :alt: **Figure 8** Tunnel network - VPC interconnection scenario + + **Figure 8** Tunnel network - VPC interconnection scenario + +Pay attention to the following: + +- The VPC of the clusters must not overlap. +- The container CIDR blocks of all clusters can overlap, so do the Service CIDR blocks. +- Add the peer subnet CIDR block to the route table of the VPC peering connection. + +In **Cloud Native Network 2.0** mode, after creating a VPC peering connection, you only need to add routes for the VPC peering connection to enable communication between the two VPCs. Ensure that the VPC of the clusters does not overlap. + +**VPC-IDC Scenarios** +--------------------- + +Similar to the VPC interconnection scenario, some CIDR blocks in the VPC are routed to the IDC. The pod IP addresses of CCE clusters cannot overlap with the addresses within these CIDR blocks. To access the pod IP addresses in the cluster in the IDC, you need to configure the route table to the private line VBR on the IDC. diff --git a/umn/source/best_practice/networking/selecting_a_network_model.rst b/umn/source/best_practice/networking/selecting_a_network_model.rst new file mode 100644 index 0000000..b48f95c --- /dev/null +++ b/umn/source/best_practice/networking/selecting_a_network_model.rst @@ -0,0 +1,70 @@ +:original_name: cce_bestpractice_00162.html + +.. _cce_bestpractice_00162: + +Selecting a Network Model +========================= + +CCE uses self-proprietary, high-performance container networking add-ons to support the tunnel network, Cloud Native Network 2.0, and VPC network models. + +.. caution:: + + After a cluster is created, the network model cannot be changed. Exercise caution when selecting a network model. + +- **Tunnel network**: The container network is an overlay tunnel network on top of a VPC network and uses the VXLAN technology. This network model is applicable when there is no high requirements on performance. VXLAN encapsulates Ethernet packets as UDP packets for tunnel transmission. Though at some cost of performance, the tunnel encapsulation enables higher interoperability and compatibility with advanced features (such as network policy-based isolation), meeting the requirements of most applications. + + + .. figure:: /_static/images/en-us_image_0000001145545261.png + :alt: **Figure 1** Container tunnel network + + **Figure 1** Container tunnel network + +- **VPC network**: The container network uses VPC routing to integrate with the underlying network. This network model is applicable to performance-intensive scenarios. The maximum number of nodes allowed in a cluster depends on the route quota in a VPC network. Each node is assigned a CIDR block of a fixed size. VPC networks are free from tunnel encapsulation overhead and outperform container tunnel networks. In addition, as VPC routing includes routes to node IP addresses and container network segment, container pods in the cluster can be directly accessed from outside the cluster. + + + .. figure:: /_static/images/en-us_image_0261818875.png + :alt: **Figure 2** VPC network + + **Figure 2** VPC network + +- **Cloud Native Network 2.0**: The container network deeply integrates the elastic network interface (ENI) capability of VPC, uses the VPC CIDR block to allocate container addresses, and supports passthrough networking to containers through a load balancer. + + + .. figure:: /_static/images/en-us_image_0000001352539924.png + :alt: **Figure 3** Cloud Native Network 2.0 + + **Figure 3** Cloud Native Network 2.0 + +The following table lists the differences between the network models. + +.. table:: **Table 1** Networking model comparison + + +------------------------+-----------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------+ + | Dimension | Tunnel Network | VPC Network | Cloud Native Network 2.0 | + +========================+===================================================================================================================================+======================================================================================================================================================+========================================================================================================+ + | Core technology | OVS | IPvlan and VPC route | VPC ENI/sub-ENI | + +------------------------+-----------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------+ + | Applicable Clusters | CCE cluster | CCE cluster | CCE Turbo cluster | + +------------------------+-----------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------+ + | Network isolation | Kubernetes native NetworkPolicy for pods | No | Pods support security group isolation. | + +------------------------+-----------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------+ + | Passthrough networking | No | No | Yes | + +------------------------+-----------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------+ + | IP address management | - The container CIDR block is allocated separately. | - The container CIDR block is allocated separately. | The container CIDR block is divided from the VPC subnet and does not need to be allocated separately. | + | | - CIDR blocks are divided by node and can be dynamically allocated (CIDR blocks can be dynamically added after being allocated.) | - CIDR blocks are divided by node and statically allocated (the CIDR block cannot be changed after a node is created). | | + +------------------------+-----------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------+ + | Performance | Performance loss due to VXLAN encapsulation | No tunnel encapsulation. Cross-node packets are forwarded through VPC routers, delivering performance equivalent to that of the host network. | The container network is integrated with the VPC network, eliminating performance loss. | + +------------------------+-----------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------+ + | Networking scale | A maximum of 2,000 nodes are supported. | By default, 200 nodes are supported. | A maximum of 2,000 nodes are supported. | + | | | | | + | | | Each time a node is added to the cluster, a route is added to the VPC routing table. Therefore, the cluster scale is limited by the VPC route table. | | + +------------------------+-----------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------+ + | Scenario | - Common container services | - Scenarios that have high requirements on network latency and bandwidth | - Scenarios that have high requirements on network latency, bandwidth, and performance | + | | - Scenarios that do not have high requirements on network latency and bandwidth | - Containers communicate with VMs using a microservice registration framework, such as Dubbo and CSE. | - Containers communicate with VMs using a microservice registration framework, such as Dubbo and CSE. | + +------------------------+-----------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------+ + +.. important:: + + #. The scale of a cluster that uses the VPC network model is limited by the custom routes of the VPC. Therefore, you need to estimate the number of required nodes before creating a cluster. + #. The scale of a cluster that uses the Cloud Native Network 2.0 model depends on the size of the VPC subnet CIDR block selected for the network attachment definition. Before creating a cluster, evaluate the scale of your cluster. + #. By default, VPC routing network supports direct communication between containers and hosts in the same VPC. If a peering connection policy is configured between the VPC and another VPC, the containers can directly communicate with hosts on the peer VPC. In addition, in hybrid networking scenarios such as Direct Connect and VPN, communication between containers and hosts on the peer end can also be achieved with proper planning. diff --git a/umn/source/best_practice/security/cluster_security.rst b/umn/source/best_practice/security/cluster_security.rst new file mode 100644 index 0000000..ea7b068 --- /dev/null +++ b/umn/source/best_practice/security/cluster_security.rst @@ -0,0 +1,173 @@ +:original_name: cce_bestpractice_0317.html + +.. _cce_bestpractice_0317: + +Cluster Security +================ + +For security purposes, you are advised to configure a cluster as follows. + +Using the CCE Cluster of the Latest Version +------------------------------------------- + +Kubernetes releases a major version in about four months. CCE follows the same frequency as Kubernetes to release major versions. To be specific, a new CCE version is released about three months after a new Kubernetes version is released in the community. For example, Kubernetes v1.19 was released in September 2020 and CCE v1.19 was released in March 2021. + +The latest cluster version has known vulnerabilities fixed or provides a more comprehensive security protection mechanism. You are advised to select the latest cluster version when creating a cluster. Before a cluster version is deprecated and removed, upgrade your cluster to a supported version. + +Disabling the Automatic Token Mounting Function of the Default Service Account +------------------------------------------------------------------------------ + +By default, Kubernetes associates the default service account with every pod. That is, the token is mounted to a container. The container can use this token to pass the authentication by the kube-apiserver and kubelet components. In a cluster with RBAC disabled, the service account who owns the token has the control permissions for the entire cluster. In a cluster with RBAC enabled, the permissions of the service account who owns the token depends on the roles associated by the administrator. The service account's token is generally used by workloads that need to access kube-apiserver, such as coredns, autoscaler, and prometheus. For workloads that do not need to access kube-apiserver, you are advised to disable the automatic association between the service account and token. + +Two methods are available: + +- Method 1: Set the **automountServiceAccountToken** field of the service account to **false**. After the configuration is complete, newly created workloads will not be associated with the default service account by default. Set this field for each namespace as required. + + .. code-block:: + + apiVersion: v1 + kind: ServiceAccount + metadata: + name: default + automountServiceAccountToken: false + ... + + When a workload needs to be associated with a service account, explicitly set the **automountServiceAccountToken** field to **true** in the YAML file of the workload. + + .. code-block:: + + ... + spec: + template: + spec: + serviceAccountName: default + automountServiceAccountToken: true + ... + +- Method 2: Explicitly disable the function of automatically associating with service accounts for workloads. + + .. code-block:: + + ... + spec: + template: + spec: + automountServiceAccountToken: false + ... + +Configuring Proper Cluster Access Permissions for Users +------------------------------------------------------- + +CCE allows you to create multiple IAM users. Your account can create different user groups, assign different access permissions to different user groups, and add users to the user groups with corresponding permissions when creating IAM users. In this way, users can control permissions on different regions and assign read-only permissions. Your account can also assign namespace-level permissions for users or user groups. To ensure security, it is advised that minimum user access permissions are assigned. + +If you need to create multiple IAM users, configure the permissions of the IAM users and namespaces properly. + +Configuring Resource Quotas for Cluster Namespaces +-------------------------------------------------- + +CCE provides resource quota management, which allows users to limit the total amount of resources that can be allocated to each namespace. These resources include CPU, memory, storage volumes, pods, Services, Deployments, and StatefulSets. Proper configuration can prevent excessive resources created in a namespace from affecting the stability of the entire cluster. + +Configuring LimitRange for Containers in a Namespace +---------------------------------------------------- + +With resource quotas, cluster administrators can restrict the use and creation of resources by namespace. In a namespace, a pod or container can use the maximum CPU and memory resources defined by the resource quota of the namespace. In this case, a pod or container may monopolize all available resources in the namespace. You are advised to configure LimitRange to restrict resource allocation within the namespace. The LimitRange parameter has the following restrictions: + +- Limits the minimum and maximum resource usage of each pod or container in a namespace. + + For example, create the maximum and minimum CPU usage limits for a pod in a namespace as follows: + + cpu-constraints.yaml + + .. code-block:: + + apiVersion: v1 + kind: LimitRange + metadata: + name: cpu-min-max-demo-lr + spec: + limits: + - max: + cpu: "800m" + min: + cpu: "200m" + type: Container + + Then, run **kubectl -n** ** **create -f** *cpu-constraints.yaml* to complete the creation. If the default CPU usage is not specified for the container, the platform automatically configures the default CPU usage. That is, the default configuration is automatically added after the container is created. + + .. code-block:: + + ... + spec: + limits: + - default: + cpu: 800m + defaultRequest: + cpu: 800m + max: + cpu: 800m + min: + cpu: 200m + type: Container + +- Limits the maximum and minimum storage space that each PersistentVolumeClaim can apply for in a namespace. + + storagelimit.yaml + + .. code-block:: + + apiVersion: v1 + kind: LimitRange + metadata: + name: storagelimit + spec: + limits: + - type: PersistentVolumeClaim + max: + storage: 2Gi + min: + storage: 1Gi + + Then, run **kubectl -n** ** **create -f** *storagelimit.yaml* to complete the creation. + +Configuring Network Isolation in a Cluster +------------------------------------------ + +- Container tunnel network + + If networks need to be isolated between namespaces in a cluster or between workloads in the same namespace, you can configure network policies to isolate the networks. + +- Cloud Native Network 2.0 + + In the Cloud Native Network 2.0 model, you can configure security groups to isolate networks between pods. For details, see `SecurityGroups `__. + +- VPC network + + Network isolation is not supported. + +Enabling the Webhook Authentication Mode with kubelet +----------------------------------------------------- + +.. important:: + + CCE clusters of v1.15.6-r1 or earlier are involved, whereas versions later than v1.15.6-r1 are not. + + Upgrade the CCE cluster version to 1.13 or 1.15 and enable the RBAC capability for the cluster. If the version is 1.13 or later, no upgrade is required. + +When creating a node, you can enable the kubelet authentication mode by injecting the **postinstall** file (by setting the kubelet startup parameter **--authorization-node=Webhook**). + +#. Run the following command to create clusterrolebinding: + + **kubectl create clusterrolebinding kube-apiserver-kubelet-admin --clusterrole=system:kubelet-api-admin --user=system:kube-apiserver** + +#. For an existing node, log in to the node, change **authorization mode** in **/var/paas/kubernetes/kubelet/kubelet_config.yaml** on the node to **Webhook**, and restart kubelet. + + **sed -i s/AlwaysAllow/Webhook/g /var/paas/kubernetes/kubelet/kubelet_config.yaml; systemctl restart kubelet** + +#. For a new node, add the following command to the post-installation script to change the kubelet permission mode: + + **sed -i s/AlwaysAllow/Webhook/g /var/paas/kubernetes/kubelet/kubelet_config.yaml; systemctl restart kubelet** + +Uninstalling web-terminal After Use +----------------------------------- + +The web-terminal add-on can be used to manage CCE clusters. Keep the login password secure and uninstall the add-on when it is no longer needed. diff --git a/umn/source/best_practice/security/container_security.rst b/umn/source/best_practice/security/container_security.rst new file mode 100644 index 0000000..e16075e --- /dev/null +++ b/umn/source/best_practice/security/container_security.rst @@ -0,0 +1,131 @@ +:original_name: cce_bestpractice_0319.html + +.. _cce_bestpractice_0319: + +Container Security +================== + +Controlling the Pod Scheduling Scope +------------------------------------ + +The nodeSelector or nodeAffinity is used to limit the range of nodes to which applications can be scheduled, preventing the entire cluster from being threatened due to the exceptions of a single application. + +Suggestions on Container Security Configuration +----------------------------------------------- + +- Set the computing resource limits (**request** and **limit**) of a container. This prevents the container from occupying too many resources and affecting the stability of the host and other containers on the same node. +- Unless necessary, do not mount sensitive host directories to containers, such as **/**, **/boot**, **/dev**, **/etc**, **/lib**, **/proc**, **/sys**, and **/usr**. +- Do not run the sshd process in containers unless necessary. +- Unless necessary, it is not recommended that containers and hosts share the network namespace. +- Unless necessary, it is not recommended that containers and hosts share the process namespace. +- Unless necessary, it is not recommended that containers and hosts share the IPC namespace. +- Unless necessary, it is not recommended that containers and hosts share the UTS namespace. +- Unless necessary, do not mount the sock file of Docker to any container. + +Container Permission Access Control +----------------------------------- + +When using a containerized application, comply with the minimum privilege principle and properly set securityContext of Deployments or StatefulSets. + +- Configure runAsUser to specify a non-root user to run a container. + +- Configure privileged to prevent containers being used in scenarios where privilege is not required. + +- Configure capabilities to accurately control the privileged access permission of containers. + +- Configure allowPrivilegeEscalation to disable privilege escape in scenarios where privilege escalation is not required for container processes. + +- Configure seccomp to restrict the container syscalls. For details, see `Restrict a Container's Syscalls with seccomp `__ in the official Kubernetes documentation. + +- Configure ReadOnlyRootFilesystem to protect the root file system of a container. + + Example YAML for a Deployment: + + .. code-block:: + + apiVersion: apps/v1 + kind: Deployment + metadata: + name: security-context-example + namespace: security-example + spec: + replicas: 1 + selector: + matchLabels: + app: security-context-example + label: security-context-example + strategy: + rollingUpdate: + maxSurge: 25% + maxUnavailable: 25% + type: RollingUpdate + template: + metadata: + annotations: + seccomp.security.alpha.kubernetes.io/pod: runtime/default + labels: + app: security-context-example + label: security-context-example + spec: + containers: + - image: ... + imagePullPolicy: Always + name: security-context-example + securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + runAsUser: 1000 + capabilities: + add: + - NET_BIND_SERVICE + drop: + - all + volumeMounts: + - mountPath: /etc/localtime + name: localtime + readOnly: true + - mountPath: /opt/write-file-dir + name: tmpfs-example-001 + securityContext: + seccompProfile: + type: RuntimeDefault + volumes: + - hostPath: + path: /etc/localtime + type: "" + name: localtime + - emptyDir: {} + name: tmpfs-example-001 + +Restricting the Access of Containers to the Management Plane +------------------------------------------------------------ + +If application containers on a node do not need to access Kubernetes, you can perform the following operations to disable containers from accessing kube-apiserver: + +#. Query the container CIDR block and private API server address. + + On the **Clusters** page of the CCE console, click the name of the cluster to find the information on the details page. + +#. Log in to each node in the CCE cluster as user **root** and run the following command: + + - VPC network: + + .. code-block:: + + iptables -I OUTPUT -s {container_cidr} -d {Private API server IP} -j REJECT + + - Container tunnel network: + + .. code-block:: + + iptables -I FORWARD -s {container_cidr} -d {Private API server IP} -j REJECT + + *{container_cidr}* indicates the container network of the cluster, for example, 10.0.0.0/16, and *{master_ip}* indicates the IP address of the master node. + + To ensure configuration persistence, you are advised to write the command to the **/etc/rc.local** script. + +#. Run the following command in the container to access kube-apiserver and check whether the request is intercepted: + + .. code-block:: + + curl -k https://{Private API server IP}:5443 diff --git a/umn/source/best_practice/security/index.rst b/umn/source/best_practice/security/index.rst new file mode 100644 index 0000000..ee55f6b --- /dev/null +++ b/umn/source/best_practice/security/index.rst @@ -0,0 +1,20 @@ +:original_name: cce_bestpractice_0315.html + +.. _cce_bestpractice_0315: + +Security +======== + +- :ref:`Cluster Security ` +- :ref:`Node Security ` +- :ref:`Container Security ` +- :ref:`Secret Security ` + +.. toctree:: + :maxdepth: 1 + :hidden: + + cluster_security + node_security + container_security + secret_security diff --git a/umn/source/best_practice/security/node_security.rst b/umn/source/best_practice/security/node_security.rst new file mode 100644 index 0000000..ae947b2 --- /dev/null +++ b/umn/source/best_practice/security/node_security.rst @@ -0,0 +1,89 @@ +:original_name: cce_bestpractice_0318.html + +.. _cce_bestpractice_0318: + +Node Security +============= + +Preventing Nodes from Being Exposed to Public Networks +------------------------------------------------------ + +- Do not bind an EIP to a node unless necessary to reduce the attack surface. +- If an EIP must be used, properly configure the firewall or security group rules to restrict access of unnecessary ports and IP addresses. + +You may have configured the **kubeconfig.json** file on a node in your cluster. kubectl can use the certificate and private key in this file to control the entire cluster. You are advised to delete unnecessary files from the **/root/.kube** directory on the node to prevent malicious use. + +rm -rf /root/.kube + +Hardening VPC Security Group Rules +---------------------------------- + +CCE is a universal container platform. Its default security group rules apply to common scenarios. Based on security requirements, you can harden the security group rules set for CCE clusters on the **Security Groups** page of **Network Console**. + +Hardening Nodes on Demand +------------------------- + +CCE cluster nodes use the default settings of open source OSs. After a node is created, you need to perform security hardening according to your service requirements. + +In CCE, you can perform hardening as follows: + +- Use the post-installation script after the node is created. For details, see the description about **Post-installation Script** in **Advanced Settings** when creating a node. This script is user-defined. + +Forbidding Containers to Obtain Host Machine Metadata +----------------------------------------------------- + +If a single CCE cluster is shared by multiple users to deploy containers, containers cannot access the management address (169.254.169.254) of OpenStack, preventing containers from obtaining metadata of host machines. + +For details about how to restore the metadata, see the "Notes" section in `Obtaining Metadata `__. + +.. warning:: + + This solution may affect the password change on the ECS console. Therefore, you must verify the solution before rectifying the fault. + +#. Obtain the network model and container CIDR of the cluster. + + On the **Clusters** page of the CCE console, view the network model and container CIDR of the cluster. + + |image1| + +#. Prevent the container from obtaining host metadata. + + - VPC network + + a. Log in to each node in the CCE cluster as user **root** and run the following command: + + .. code-block:: + + iptables -I OUTPUT -s {container_cidr} -d 169.254.169.254 -j REJECT + + *{container_cidr}* indicates the container CIDR of the cluster, for example, 10.0.0.0/16. + + To ensure configuration persistence, you are advised to write the command to the **/etc/rc.local** script. + + b. Run the following commands in the container to access the **userdata** and **metadata** interfaces of OpenStack and check whether the request is intercepted: + + .. code-block:: + + curl 169.254.169.254/openstack/latest/meta_data.json + curl 169.254.169.254/openstack/latest/user_data + + - Container tunnel network + + a. Log in to each node in the CCE cluster as user **root** and run the following command: + + .. code-block:: + + iptables -I FORWARD -s {container_cidr} -d 169.254.169.254 -j REJECT + + *{container_cidr}* indicates the container CIDR of the cluster, for example, 10.0.0.0/16. + + To ensure configuration persistence, you are advised to write the command to the **/etc/rc.local** script. + + b. Run the following commands in the container to access the **userdata** and **metadata** interfaces of OpenStack and check whether the request is intercepted: + + .. code-block:: + + curl 169.254.169.254/openstack/latest/meta_data.json + curl 169.254.169.254/openstack/latest/user_data + +.. |image1| image:: /_static/images/en-us_image_0000001226818003.png diff --git a/umn/source/best_practice/security/secret_security.rst b/umn/source/best_practice/security/secret_security.rst new file mode 100644 index 0000000..cd48f03 --- /dev/null +++ b/umn/source/best_practice/security/secret_security.rst @@ -0,0 +1,122 @@ +:original_name: cce_bestpractice_0320.html + +.. _cce_bestpractice_0320: + +Secret Security +=============== + +Currently, CCE has configured static encryption for secret resources. The secrets created by users will be encrypted and stored in etcd of the CCE cluster. Secrets can be used in two modes: environment variable and file mounting. No matter which mode is used, CCE still transfers the configured data to users. Therefore, it is recommended that: + +#. Do not record sensitive information in logs. + +#. For the secret that uses the file mounting mode, the default file permission mapped in the container is 0644. Configure stricter permissions for the file. For example: + + .. code-block:: + + apiversion: v1 + kind: Pod + metadata: + name: mypod + spec: + containers: + - name: mypod + image: redis + volumeMounts: + - name: foo + mountPath: "/etc/foo" + volumes: + - name: foo + secret: + secretName: mysecret + defaultMode: 256 + + In **defaultMode: 256**, **256** is a decimal number, which corresponds to the octal number **0400**. + +#. When the file mounting mode is used, configure the secret file name to hide the file in the container. + + .. code-block:: + + apiVersion: v1 + kind: Secret + metadata: + name: dotfile-secret + data: + .secret-file: dmFsdWUtMg0KDQo= + --- + apiVersion: v1 + kind: Pod + metadata; + name: secret-dotfiles-pod + spec: + volumes: + - name: secret-volume + secret: + secretName: dotfile-secret + containers: + - name: dotfile-test-container + image: k8s.gcr.io/busybox + command: + - ls + - "-1" + - "/etc/secret-volume" + volumeMounts: + - name: secret-volume + readOnly: true + mountPath: "/etc/secret-volume" + + In this way, **.secret-file** cannot be viewed by running the **ls -l** command in the **/etc/secret-volume/** directory, but can be viewed by running the **ls -al** command. + +#. Encrypt sensitive information before creating a secret and decrypt the information when using it. + +Using a Bound ServiceAccount Token to Access a Cluster +------------------------------------------------------ + +The secret-based ServiceAccount token does not support expiration time or auto update. In addition, after the mounting pod is deleted, the token is still stored in the secret. Token leakage may incur security risks. A bound ServiceAccount token is recommended for CCE clusters of version 1.23 or later. In this mode, the expiration time can be set and is the same as the pod lifecycle, reducing token leakage risks. Example: + +.. code-block:: + + apiVersion: apps/v1 + kind: Deployment + metadata: + name: security-token-example + namespace: security-example + spec: + replicas: 1 + selector: + matchLabels: + app: security-token-example + label: security-token-example + template: + metadata: + annotations: + seccomp.security.alpha.kubernetes.io/pod: runtime/default + labels: + app: security-token-example + label: security-token-example + spec: + serviceAccountName: test-sa + containers: + - image: ... + imagePullPolicy: Always + name: security-token-example + volumes: + - name: test-projected + projected: + defaultMode: 420 + sources: + - serviceAccountToken: + expirationSeconds: 1800 + path: token + - configMap: + items: + - key: ca.crt + path: ca.crt + name: kube-root-ca.crt + - downwardAPI: + items: + - fieldRef: + apiVersion: v1 + fieldPath: metadata.namespace + path: namespace + +For details, visit https://kubernetes.io/docs/reference/access-authn-authz/service-accounts-admin/. diff --git a/umn/source/best_practice/storage/custom_storage_classes.rst b/umn/source/best_practice/storage/custom_storage_classes.rst new file mode 100644 index 0000000..b3079ee --- /dev/null +++ b/umn/source/best_practice/storage/custom_storage_classes.rst @@ -0,0 +1,326 @@ +:original_name: cce_bestpractice_00281_0.html + +.. _cce_bestpractice_00281_0: + +Custom Storage Classes +====================== + +Challenges +---------- + +When using storage resources in CCE, the most common method is to specify **storageClassName** to define the type of storage resources to be created when creating a PVC. The following configuration shows how to use a PVC to apply for an SAS (high I/O) EVS disk (block storage). + +.. code-block:: + + apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: pvc-evs-example + namespace: default + annotations: + everest.io/disk-volume-type: SAS + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 10Gi + storageClassName: csi-disk + +If you need to specify the EVS disk type, you can set the **everest.io/disk-volume-type** field. The value **SAS** is used as an example here, indicating the high I/O EVS disk type. Or you can choose **SATA** (common I/O) and **SSD** (ultra-high I/O). + +This configuration method may not work if you want to: + +- Set **storageClassName** only, which is simpler than specifying the EVS disk type by using **everest.io/disk-volume-type**. +- Avoid modifying YAML files or Helm charts. Some users switch from self-built or other Kubernetes services to CCE and have written YAML files of many applications. In these YAML files, different types of storage resources are specified by different StorageClassNames. When using CCE, they need to modify a large number of YAML files or Helm charts to use storage resources, which is labor-consuming and error-prone. +- Set the default **storageClassName** for all applications to use the default storage class. In this way, you can create storage resources of the default type without needing to specify **storageClassName** in the YAML file. + +Solution +-------- + +This section describes how to set a custom storage class in CCE and how to set the default storage class. You can specify different types of storage resources by setting **storageClassName**. + +- For the first scenario, you can define custom storageClassNames for SAS and SSD EVS disks. For example, define a storage class named **csi-disk-sas** for creating SAS disks. The following figure shows the differences before and after you use a custom storage class. + + |image1| + +- For the second scenario, you can define a storage class with the same name as that in the existing YAML file without needing to modify **storageClassName** in the YAML file. + +- For the third scenario, you can set the default storage class as described below to create storage resources without specifying **storageClassName** in YAML files. + + .. code-block:: + + apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: pvc-evs-example + namespace: default + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 10Gi + +Storage Classes in CCE +---------------------- + +Run the following command to query the supported storage classes. + +.. code-block:: + + # kubectl get sc + NAME PROVISIONER AGE + csi-disk everest-csi-provisioner 17d # Storage class for EVS disks + csi-disk-topology everest-csi-provisioner 17d # Storage class for EVS disks with delayed association + csi-nas everest-csi-provisioner 17d # Storage class for SFS file systems + csi-obs everest-csi-provisioner 17d # Storage Class for OBS buckets + csi-sfsturbo everest-csi-provisioner 17d # Storage class for SFS Turbo file systems + +Check the details of **csi-disk**. You can see that the type of the disk created by **csi-disk** is SAS by default. + +.. code-block:: + + # kubectl get sc csi-disk -oyaml + allowVolumeExpansion: true + apiVersion: storage.k8s.io/v1 + kind: StorageClass + metadata: + creationTimestamp: "2021-03-17T02:10:32Z" + name: csi-disk + resourceVersion: "760" + selfLink: /apis/storage.k8s.io/v1/storageclasses/csi-disk + uid: 4db97b6c-853b-443d-b0dc-41cdcb8140f2 + parameters: + csi.storage.k8s.io/csi-driver-name: disk.csi.everest.io + csi.storage.k8s.io/fstype: ext4 + everest.io/disk-volume-type: SAS + everest.io/passthrough: "true" + provisioner: everest-csi-provisioner + reclaimPolicy: Delete + volumeBindingMode: Immediate + + +Custom Storage Classes +---------------------- + +You can customize a high I/O storage class in a YAML file. For example, the name **csi-disk-sas** indicates that the disk type is SAS (high I/O). + +.. code-block:: + + apiVersion: storage.k8s.io/v1 + kind: StorageClass + metadata: + name: csi-disk-sas # Name of the high I/O storage class, which can be customized. + parameters: + csi.storage.k8s.io/csi-driver-name: disk.csi.everest.io + csi.storage.k8s.io/fstype: ext4 + everest.io/disk-volume-type: SAS # High I/O EVS disk type, which cannot be customized. + everest.io/passthrough: "true" + provisioner: everest-csi-provisioner + reclaimPolicy: Delete + volumeBindingMode: Immediate + allowVolumeExpansion: true # true indicates that capacity expansion is allowed. + +For an ultra-high I/O storage class, you can set the class name to **csi-disk-ssd** to create SSD EVS disk (ultra-high I/O). + +.. code-block:: + + apiVersion: storage.k8s.io/v1 + kind: StorageClass + metadata: + name: csi-disk-ssd # Name of the ultra-high I/O storage class, which can be customized. + parameters: + csi.storage.k8s.io/csi-driver-name: disk.csi.everest.io + csi.storage.k8s.io/fstype: ext4 + everest.io/disk-volume-type: SSD # Ultra-high I/O EVS disk type, which cannot be customized. + everest.io/passthrough: "true" + provisioner: everest-csi-provisioner + reclaimPolicy: Delete + volumeBindingMode: Immediate + allowVolumeExpansion: true + +**reclaimPolicy**: indicates the recycling policies of the underlying cloud storage. The value can be **Delete** or **Retain**. + +- **Delete**: When a PVC is deleted, both the PV and the EVS disk are deleted. +- **Retain**: When a PVC is deleted, the PV and underlying storage resources are not deleted. Instead, you must manually delete these resources. After that, the PV resource is in the **Released** state and cannot be bound to the PVC again. + +.. note:: + + The reclamation policy set here has no impact on the SFS Turbo storage. Therefore, the yearly/monthly SFS Turbo resources will not be reclaimed when the cluster or PVC is deleted. + +If high data security is required, you are advised to select **Retain** to prevent data from being deleted by mistake. + +After the definition is complete, run the **kubectl create** commands to create storage resources. + +.. code-block:: + + # kubectl create -f sas.yaml + storageclass.storage.k8s.io/csi-disk-sas created + # kubectl create -f ssd.yaml + storageclass.storage.k8s.io/csi-disk-ssd created + +Query the storage class again. Two more types of storage classes are displayed in the command output, as shown below. + +.. code-block:: + + # kubectl get sc + NAME PROVISIONER AGE + csi-disk everest-csi-provisioner 17d + csi-disk-sas everest-csi-provisioner 2m28s + csi-disk-ssd everest-csi-provisioner 16s + csi-disk-topology everest-csi-provisioner 17d + csi-nas everest-csi-provisioner 17d + csi-obs everest-csi-provisioner 17d + csi-sfsturbo everest-csi-provisioner 17d + +Other types of storage resources can be defined in the similar way. You can use kubectl to obtain the YAML file and modify it as required. + +- File storage + + .. code-block:: + + # kubectl get sc csi-nas -oyaml + kind: StorageClass + apiVersion: storage.k8s.io/v1 + metadata: + name: csi-nas + provisioner: everest-csi-provisioner + parameters: + csi.storage.k8s.io/csi-driver-name: nas.csi.everest.io + csi.storage.k8s.io/fstype: nfs + everest.io/share-access-level: rw + everest.io/share-access-to: 5e3864c6-e78d-4d00-b6fd-de09d432c632 # ID of the VPC to which the cluster belongs + everest.io/share-is-public: 'false' + everest.io/zone: xxxxx # AZ + reclaimPolicy: Delete + allowVolumeExpansion: true + volumeBindingMode: Immediate + +- Object storage + + .. code-block:: + + # kubectl get sc csi-obs -oyaml + kind: StorageClass + apiVersion: storage.k8s.io/v1 + metadata: + name: csi-obs + provisioner: everest-csi-provisioner + parameters: + csi.storage.k8s.io/csi-driver-name: obs.csi.everest.io + csi.storage.k8s.io/fstype: s3fs # Object storage type. s3fs indicates an object bucket, and obsfs indicates a parallel file system. + everest.io/obs-volume-type: STANDARD # Storage class of the OBS bucket + reclaimPolicy: Delete + volumeBindingMode: Immediate + +Setting a Default Storage Class +------------------------------- + +You can specify a storage class as the default class. In this way, if you do not specify **storageClassName** when creating a PVC, the PVC is created using the default storage class. + +For example, to specify **csi-disk-ssd** as the default storage class, edit your YAML file as follows: + +.. code-block:: + + apiVersion: storage.k8s.io/v1 + kind: StorageClass + metadata: + name: csi-disk-ssd + annotations: + storageclass.kubernetes.io/is-default-class: "true" # Specifies the default storage class in a cluster. A cluster can have only one default storage class. + parameters: + csi.storage.k8s.io/csi-driver-name: disk.csi.everest.io + csi.storage.k8s.io/fstype: ext4 + everest.io/disk-volume-type: SSD + everest.io/passthrough: "true" + provisioner: everest-csi-provisioner + reclaimPolicy: Delete + volumeBindingMode: Immediate + allowVolumeExpansion: true + +Delete the created csi-disk-ssd disk, run the **kubectl create** command to create a csi-disk-ssd disk again, and then query the storage class. The following information is displayed. + +.. code-block:: + + # kubectl delete sc csi-disk-ssd + storageclass.storage.k8s.io "csi-disk-ssd" deleted + # kubectl create -f ssd.yaml + storageclass.storage.k8s.io/csi-disk-ssd created + # kubectl get sc + NAME PROVISIONER AGE + csi-disk everest-csi-provisioner 17d + csi-disk-sas everest-csi-provisioner 114m + csi-disk-ssd (default) everest-csi-provisioner 9s + csi-disk-topology everest-csi-provisioner 17d + csi-nas everest-csi-provisioner 17d + csi-obs everest-csi-provisioner 17d + csi-sfsturbo everest-csi-provisioner 17d + +Verification +------------ + +- Use **csi-disk-sas** to create a PVC. + + .. code-block:: + + apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: sas-disk + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 10Gi + storageClassName: csi-disk-sas + + Create a storage class and view its details. As shown below, the object can be created and the value of **STORAGECLASS** is **csi-disk-sas**. + + .. code-block:: + + # kubectl create -f sas-disk.yaml + persistentvolumeclaim/sas-disk created + # kubectl get pvc + NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS AGE + sas-disk Bound pvc-6e2f37f9-7346-4419-82f7-b42e79f7964c 10Gi RWO csi-disk-sas 24s + # kubectl get pv + NAME CAPACITY ACCESS MODES RECLAIM POLICY STATUS CLAIM STORAGECLASS REASON AGE + pvc-6e2f37f9-7346-4419-82f7-b42e79f7964c 10Gi RWO Delete Bound default/sas-disk csi-disk-sas 30s + + View the PVC details on the CCE console. On the PV details page, you can see that the disk type is high I/O. + +- If **storageClassName** is not specified, the default configuration is used, as shown below. + + .. code-block:: + + apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: ssd-disk + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 10Gi + + Create and view the storage resource. You can see that the storage class of PVC ssd-disk is csi-disk-ssd, indicating that csi-disk-ssd is used by default. + + .. code-block:: + + # kubectl create -f ssd-disk.yaml + persistentvolumeclaim/ssd-disk created + # kubectl get pvc + NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS AGE + sas-disk Bound pvc-6e2f37f9-7346-4419-82f7-b42e79f7964c 10Gi RWO csi-disk-sas 16m + ssd-disk Bound pvc-4d2b059c-0d6c-44af-9994-f74d01c78731 10Gi RWO csi-disk-ssd 10s + # kubectl get pv + NAME CAPACITY ACCESS MODES RECLAIM POLICY STATUS CLAIM STORAGECLASS REASON AGE + pvc-4d2b059c-0d6c-44af-9994-f74d01c78731 10Gi RWO Delete Bound default/ssd-disk csi-disk-ssd 15s + pvc-6e2f37f9-7346-4419-82f7-b42e79f7964c 10Gi RWO Delete Bound default/sas-disk csi-disk-sas 17m + + View the PVC details on the CCE console. On the PV details page, you can see that the disk type is ultra-high I/O. + +.. |image1| image:: /_static/images/en-us_image_0000001102275444.png diff --git a/umn/source/best_practice/storage/dynamically_creating_and_mounting_subdirectories_of_an_sfs_turbo_file_system.rst b/umn/source/best_practice/storage/dynamically_creating_and_mounting_subdirectories_of_an_sfs_turbo_file_system.rst new file mode 100644 index 0000000..46a70e6 --- /dev/null +++ b/umn/source/best_practice/storage/dynamically_creating_and_mounting_subdirectories_of_an_sfs_turbo_file_system.rst @@ -0,0 +1,251 @@ +:original_name: cce_bestpractice_00253_0.html + +.. _cce_bestpractice_00253_0: + +Dynamically Creating and Mounting Subdirectories of an SFS Turbo File System +============================================================================ + +Background +---------- + +The minimum capacity of an SFS Turbo file system is 500 GB, and the SFS Turbo file system cannot be billed by usage. By default, the root directory of an SFS Turbo file system is mounted to a container which, in most case, does not require such a large capacity. + +The everest add-on allows you to dynamically create subdirectories in an SFS Turbo file system and mount these subdirectories to containers. In this way, an SFS Turbo file system can be shared by multiple containers to increase storage efficiency. + +Notes and Constraints +--------------------- + +- Only clusters of v1.15 and later are supported. +- The cluster must use the everest add-on of version 1.1.13 or later. +- Kata containers are not supported. +- A maximum of 10 PVCs can be created concurrently at a time by using the subdirectory function. + +Creating an SFS Turbo Volume of the subpath Type +------------------------------------------------ + +.. caution:: + + The CCE console has not yet supported the operations related to this feature, such as expanding, disassociating, and deleting subPath volumes. + +#. Import an SFS Turbo file system that is located in the same VPC and subnet as the cluster. + +#. Create a StorageClass YAML file, for example, **sfsturbo-sc-test.yaml**. + + Configuration example: + + .. code-block:: + + apiVersion: storage.k8s.io/v1 + allowVolumeExpansion: true + kind: StorageClass + metadata: + name: sfsturbo-sc-test + mountOptions: + - lock + parameters: + csi.storage.k8s.io/csi-driver-name: sfsturbo.csi.everest.io + csi.storage.k8s.io/fstype: nfs + everest.io/archive-on-delete: "true" + everest.io/share-access-to: 7ca2dba2-1234-1234-1234-626371a8fb3a + everest.io/share-expand-type: bandwidth + everest.io/share-export-location: 192.168.1.1:/sfsturbo/ + everest.io/share-source: sfs-turbo + everest.io/share-volume-type: STANDARD + everest.io/volume-as: subpath + everest.io/volume-id: 0d773f2e-1234-1234-1234-de6a35074696 + provisioner: everest-csi-provisioner + reclaimPolicy: Delete + volumeBindingMode: Immediate + + In this example: + + - **name**: name of the StorageClass. + - **mountOptions**: mount options. This field is optional. + + - In versions later than everest 1.1.13 and earlier than everest 1.2.8, only the **nolock** parameter can be configured. By default, the **nolock** parameter is used for the mount operation and does not need to be configured. If **nolock** is set to **false**, the **lock** field is used. + + - Starting from everest 1.2.8, more parameters are supported. The default parameter configurations are shown below. For details, see `Setting Mount Options `__. **Do not set nolock to true. Otherwise, the mount operation fails.** + + .. code-block:: + + mountOptions: + - vers=3 + - timeo=600 + - nolock + - hard + + - **everest.io/volume-as**: Set this parameter to **subpath**. + - **everest.io/share-access-to**: This parameter is optional. In subpath mode, set this parameter to the ID of the VPC where the SFS Turbo file system is located. + - **everest.io/share-expand-type**: This parameter is optional. If the type of the SFS Turbo file system is SFS Turbo Standard - Enhanced or SFS Turbo Performance - Enhanced, set this parameter to **bandwidth**. + - **everest.io/share-export-location**: root directory to be mounted. It consists of the SFS Turbo shared path and sub-directory. The shared path can be queried on the SFS Turbo console. The sub-directory is user-defined. The PVCs created by the StorageClass are located in the sub-directory. + - **everest.io/share-volume-type**: This parameter is optional. It specifies the SFS Turbo file system type. The value can be **STANDARD** or **PERFORMANCE**. For enhanced types, this parameter must be used together with **everest.io/share-expand-type** (whose value should be **bandwidth**). + - **everest.io/zone**: This parameter is optional. Set it to the AZ where the SFS Turbo file system is located. + - **everest.io/volume-id**: ID of the SFS Turbo volume. You can query the volume ID on the SFS Turbo page. + - **everest.io/archive-on-delete**: If this parameter is set to **true** and the recycling policy is set to **Delete**, the original PV file will be archived when the PVC is deleted. The archive directory is named in the format of *archived-$PV name.timestamp*. If this parameter is set to **false**, the SFS Turbo sub-directory corresponding to the PV will be deleted. The default value is **true**. + +3. Run the **kubectl create -f sfsturbo-sc-test.yaml** command to create a StorageClass. + +4. Create a PVC YAML file named **sfs-turbo-test.yaml**. + + Configuration example: + + .. code-block:: + + apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: sfs-turbo-test + namespace: default + spec: + accessModes: + - ReadWriteMany + resources: + requests: + storage: 50Gi + storageClassName: sfsturbo-sc-test + volumeMode: Filesystem + + In this example: + + - **name**: name of the PVC. + - **storageClassName**: name of the StorageClass created in the previous step. + - **storage**: In the subpath mode, this parameter is invalid. The storage capacity is limited by the total capacity of the SFS Turbo file system. If the total capacity of the SFS Turbo file system is insufficient, expand the capacity on the SFS Turbo page in a timely manner. + +5. Run the **kubectl create -f sfs-turbo-test.yaml** command to create a PVC. + +.. note:: + + It is meaningless to conduct capacity expansion on an SFS Turbo volume created in the subpath mode. This operation does not expand the capacity of the SFS Turbo file system. You need to ensure that the total capacity of the SFS Turbo file system is not used up. + +Creating a Deployment and Mounting an Existing Volume +----------------------------------------------------- + +#. Create a Deployment YAML file named **deployment-test.yaml**. + + Configuration example: + + .. code-block:: + + apiVersion: apps/v1 + kind: Deployment + metadata: + name: test-turbo-subpath-example + namespace: default + generation: 1 + labels: + appgroup: '' + spec: + replicas: 1 + selector: + matchLabels: + app: test-turbo-subpath-example + template: + metadata: + labels: + app: test-turbo-subpath-example + spec: + containers: + - image: nginx:latest + name: container-0 + volumeMounts: + - mountPath: /tmp + name: pvc-sfs-turbo-example + restartPolicy: Always + imagePullSecrets: + - name: default-secret + volumes: + - name: pvc-sfs-turbo-example + persistentVolumeClaim: + claimName: sfs-turbo-test + + In this example: + + - **name**: name of the Deployment. + - **image**: image used by the Deployment. + - **mountPath**: mount path of the container. In this example, the volume is mounted to the **/tmp** directory. + - **claimName**: name of an existing PVC. + +2. Run the **kubectl create -f deployment-test.yaml** command to create a Deployment. + +Creating a StatefulSet That Uses a Volume Dynamically Created in subpath Mode +----------------------------------------------------------------------------- + +#. Create a StatefulSet YAML file named **statefulset-test.yaml**. + + Configuration example: + + .. code-block:: + + apiVersion: apps/v1 + kind: StatefulSet + metadata: + name: test-turbo-subpath + namespace: default + generation: 1 + labels: + appgroup: '' + spec: + replicas: 2 + selector: + matchLabels: + app: test-turbo-subpath + template: + metadata: + labels: + app: test-turbo-subpath + annotations: + metrics.alpha.kubernetes.io/custom-endpoints: '[{"api":"","path":"","port":"","names":""}]' + pod.alpha.kubernetes.io/initialized: 'true' + spec: + containers: + - name: container-0 + image: 'nginx:latest' + env: + - name: PAAS_APP_NAME + value: deploy-sfs-nfs-rw-in + - name: PAAS_NAMESPACE + value: default + - name: PAAS_PROJECT_ID + value: 8190a2a1692c46f284585c56fc0e2fb9 + resources: {} + volumeMounts: + - name: sfs-turbo-160024548582479676 + mountPath: /tmp + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + imagePullPolicy: IfNotPresent + restartPolicy: Always + terminationGracePeriodSeconds: 30 + dnsPolicy: ClusterFirst + securityContext: {} + imagePullSecrets: + - name: default-secret + affinity: {} + schedulerName: default-scheduler + volumeClaimTemplates: + - metadata: + name: sfs-turbo-160024548582479676 + namespace: default + annotations: {} + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 10Gi + storageClassName: sfsturbo-sc-test + serviceName: wwww + podManagementPolicy: OrderedReady + updateStrategy: + type: RollingUpdate + revisionHistoryLimit: 10 + + In this example: + + - **name**: name of the StatefulSet. + - **image**: image used by the StatefulSet. + - **mountPath**: mount path of the container. In this example, the volume is mounted to the **/tmp** directory. + - **spec.template.spec.containers.volumeMounts.name** and **spec.volumeClaimTemplates.metadata.name** must be consistent because they have a mapping relationship. + - **storageClassName**: name of the created StorageClass. + +2. Run the **kubectl create -f statefulset-test.yaml** command to create a StatefulSet. diff --git a/umn/source/best_practice/storage/expanding_node_disk_capacity.rst b/umn/source/best_practice/storage/expanding_node_disk_capacity.rst new file mode 100644 index 0000000..c18b43d --- /dev/null +++ b/umn/source/best_practice/storage/expanding_node_disk_capacity.rst @@ -0,0 +1,88 @@ +:original_name: cce_bestpractice_00198.html + +.. _cce_bestpractice_00198: + +Expanding Node Disk Capacity +============================ + +System Disk +----------- + +#. Expand the capacity of the system disk on the EVS console. +#. Restart the node on the ECS console. +#. Log in to the CCE console and click the cluster. In the navigation pane, choose **Nodes**. Click **More** > **Sync Server Data** at the row containing the target node. + +Node Data Disk (Dedicated for Docker) +------------------------------------- + +#. Expand the capacity of the data disk on the EVS console. + +#. Log in to the CCE console and click the cluster. In the navigation pane, choose **Nodes**. Click **More** > **Sync Server Data** at the row containing the target node. + +#. Log in to the target node. + +#. Run the **lsblk** command to check the block device information of the node. + + A data disk is divided depending on the container storage **Rootfs**: + + - Overlayfs: No independent thin pool is allocated. Image data is stored in the **dockersys** disk. + + .. code-block:: + + # lsblk + NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINT + sda 8:0 0 50G 0 disk + └─sda1 8:1 0 50G 0 part / + sdb 8:16 0 200G 0 disk + ├─vgpaas-dockersys 253:0 0 90G 0 lvm /var/lib/docker # Space used by Docker. + └─vgpaas-kubernetes 253:1 0 10G 0 lvm /mnt/paas/kubernetes/kubelet # Space used by Kubernetes. + + Run the following commands on the node to add the new disk capacity to the **dockersys** disk: + + .. code-block:: + + pvresize /dev/sdb + lvextend -l+100%FREE -n vgpaas/dockersys + resize2fs /dev/vgpaas/dockersys + + - Devicemapper: A thin pool is allocated to store image data. + + .. code-block:: + + # lsblk + NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINT + sda 8:0 0 50G 0 disk + └─sda1 8:1 0 50G 0 part / + sdb 8:16 0 200G 0 disk + ├─vgpaas-dockersys 253:0 0 18G 0 lvm /var/lib/docker + ├─vgpaas-thinpool_tmeta 253:1 0 3G 0 lvm + │ └─vgpaas-thinpool 253:3 0 67G 0 lvm # Thin pool space. + │ ... + ├─vgpaas-thinpool_tdata 253:2 0 67G 0 lvm + │ └─vgpaas-thinpool 253:3 0 67G 0 lvm + │ ... + └─vgpaas-kubernetes 253:4 0 10G 0 lvm /mnt/paas/kubernetes/kubelet + + Run the following commands on the node to add the new disk capacity to the **thinpool** disk: + + .. code-block:: + + pvresize /dev/sdb + lvextend -l+100%FREE -n vgpaas/thinpool + +Node Data Disk (Kubernetes) +--------------------------- + +#. Expand the capacity of the data disk on the EVS console. + +#. Log in to the CCE console and click the cluster. In the navigation pane, choose **Nodes**. Click **More** > **Sync Server Data** at the row containing the target node. + +#. Log in to the target node. + +#. Run the following commands on the node to add the new disk capacity to the Kubernetes disk: + + .. code-block:: + + pvresize /dev/sdb + lvextend -l+100%FREE -n vgpaas/kubernetes + resize2fs /dev/vgpaas/kubernetes diff --git a/umn/source/reference/how_do_i_change_the_storage_class_used_by_a_cluster_of_v1.15_from_flexvolume_to_csi_everest.rst b/umn/source/best_practice/storage/how_do_i_change_the_storage_class_used_by_a_cluster_of_v1.15_from_flexvolume_to_csi_everest.rst similarity index 91% rename from umn/source/reference/how_do_i_change_the_storage_class_used_by_a_cluster_of_v1.15_from_flexvolume_to_csi_everest.rst rename to umn/source/best_practice/storage/how_do_i_change_the_storage_class_used_by_a_cluster_of_v1.15_from_flexvolume_to_csi_everest.rst index 06f32a0..ea004c8 100644 --- a/umn/source/reference/how_do_i_change_the_storage_class_used_by_a_cluster_of_v1.15_from_flexvolume_to_csi_everest.rst +++ b/umn/source/best_practice/storage/how_do_i_change_the_storage_class_used_by_a_cluster_of_v1.15_from_flexvolume_to_csi_everest.rst @@ -5,7 +5,7 @@ How Do I Change the Storage Class Used by a Cluster of v1.15 from FlexVolume to CSI Everest? ============================================================================================ -For clusters of v1.15.11-r1 and later, the CSI everest add-on has taken over all functions of the fuxi FlexVolume driver (the storage-driver add-on) for container storage management. In versions later than 1.17.9-r0, the fuxi FlexVolume driver (storage-driver) is no longer supported. +In clusters later than v1.15.11-r1, CSI (the everest add-on) has taken over all functions of fuxi FlexVolume (the storage-driver add-on) for managing container storage. You are advised to use CSI Everest. To migrate your storage volumes, create a static PV to associate with the original underlying storage, and then create a PVC to associate with this static PV. When you upgrade your application, mount the new PVC to the original mounting path to migrate the storage volumes. @@ -18,8 +18,6 @@ Procedure #. (Optional) Back up data to prevent data loss in case of exceptions. -#. Run kubectl commands. - #. .. _cce_bestpractice_0107__li1219802032512: Configure a YAML file of the PV in the CSI format according to the PV in the FlexVolume format and associate the PV with the existing storage. @@ -39,7 +37,7 @@ Procedure metadata: labels: failure-domain.beta.kubernetes.io/region: eu-de - failure-domain.beta.kubernetes.io/zone: eu-de-01 + failure-domain.beta.kubernetes.io/zone: annotations: pv.kubernetes.io/provisioned-by: everest-csi-provisioner name: pv-evs-example @@ -53,7 +51,7 @@ Procedure fsType: ext4 volumeAttributes: everest.io/disk-mode: SCSI - everest.io/disk-volume-type: SATA + everest.io/disk-volume-type: SAS storage.kubernetes.io/csiProvisionerIdentity: everest-csi-provisioner volumeHandle: 0992dbda-6340-470e-a74e-4f0db288ed82 persistentVolumeReclaimPolicy: Delete @@ -63,27 +61,27 @@ Procedure .. table:: **Table 1** EVS volume configuration parameters - +------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | Parameter | Description | - +==========================================+==========================================================================================================================================================================================================================================+ - | failure-domain.beta.kubernetes.io/region | Region where the EVS disk is located. Use the same value as that of the FlexVolume PV. | - +------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | failure-domain.beta.kubernetes.io/zone | AZ where the EVS disk is located. Use the same value as that of the FlexVolume PV. | - +------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | name | Name of the PV, which must be unique in the cluster. | - +------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | storage | EVS volume capacity in the unit of Gi. Use the value of **spec.capacity.storage** of the FlexVolume PV. | - +------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | driver | Storage driver used to attach the volume. Set the driver to **disk.csi.everest.io** for the EVS volume. | - +------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | volumeHandle | Volume ID of the EVS disk. Use the value of **spec.flexVolume.options.volumeID** of the FlexVolume PV. | - +------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | everest.io/disk-mode | EVS disk mode. Use the value of **spec.flexVolume.options.disk-mode** of the FlexVolume PV. | - +------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | everest.io/disk-volume-type | EVS disk type. Currently, high I/O (SAS), ultra-high I/O (SSD), and common I/O (SATA) are supported. Use the value of **kubernetes.io/volumetype** in the storage class corresponding to **spec.storageClassName** of the FlexVolume PV. | - +------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | storageClassName | Name of the Kubernetes storage class associated with the storage volume. Set this field to **csi-disk** for EVS disks. | - +------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + +------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------+ + | Parameter | Description | + +==========================================+====================================================================================================================================================+ + | failure-domain.beta.kubernetes.io/region | Region where the EVS disk is located. Use the same value as that of the FlexVolume PV. | + +------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------+ + | failure-domain.beta.kubernetes.io/zone | AZ where the EVS disk is located. Use the same value as that of the FlexVolume PV. | + +------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------+ + | name | Name of the PV, which must be unique in the cluster. | + +------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------+ + | storage | EVS volume capacity in the unit of Gi. Use the value of **spec.capacity.storage** of the FlexVolume PV. | + +------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------+ + | driver | Storage driver used to attach the volume. Set the driver to **disk.csi.everest.io** for the EVS volume. | + +------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------+ + | volumeHandle | Volume ID of the EVS disk. Use the value of **spec.flexVolume.options.volumeID** of the FlexVolume PV. | + +------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------+ + | everest.io/disk-mode | EVS disk mode. Use the value of **spec.flexVolume.options.disk-mode** of the FlexVolume PV. | + +------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------+ + | everest.io/disk-volume-type | EVS disk type. Use the value of **kubernetes.io/volumetype** in the storage class corresponding to **spec.storageClassName** of the FlexVolume PV. | + +------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------+ + | storageClassName | Name of the Kubernetes storage class associated with the storage volume. Set this field to **csi-disk** for EVS disks. | + +------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------+ Configuration example of **a PV for an SFS volume**: @@ -104,7 +102,7 @@ Procedure driver: nas.csi.everest.io fsType: nfs volumeAttributes: - everest.io/share-export-location: sfs-nas01.Endpoint:/share-436304e8 + everest.io/share-export-location: # Shared path of the file storage storage.kubernetes.io/csiProvisionerIdentity: everest-csi-provisioner volumeHandle: 682f00bb-ace0-41d8-9b3e-913c9aa6b695 persistentVolumeReclaimPolicy: Delete @@ -165,7 +163,7 @@ Procedure +============================+===========================================================================================================================================================================================================================================================================================================================================================================================================================================================================================+ | name | Name of the PV, which must be unique in the cluster. | +----------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | storage | Storage capacity in the unit of Gi. Set this parameter to the fixed value **1Gi**. | + | storage | Storage capacity, in the unit of Gi. Set this parameter to the fixed value **1Gi**. | +----------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | driver | Storage driver used to attach the volume. Set the driver to **obs.csi.everest.io** for the OBS volume. | +----------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ @@ -227,7 +225,7 @@ Procedure #. .. _cce_bestpractice_0107__li1710710385418: - Configure a YAML file of the PVC in the CSI format according to the PVC in the FlexVolume format and associate the PVC with the PV created in :ref:`3 `. + Configure a YAML file of the PVC in the CSI format according to the PVC in the FlexVolume format and associate the PVC with the PV created in :ref:`2 `. To be specific, run the following commands to configure the pvc-example.yaml file, which is used to create a PVC. @@ -244,9 +242,9 @@ Procedure metadata: labels: failure-domain.beta.kubernetes.io/region: eu-de - failure-domain.beta.kubernetes.io/zone: eu-de-01 + failure-domain.beta.kubernetes.io/zone: annotations: - everest.io/disk-volume-type: SATA + everest.io/disk-volume-type: SAS volume.beta.kubernetes.io/storage-provisioner: everest-csi-provisioner name: pvc-evs-example namespace: default @@ -270,7 +268,7 @@ Procedure +------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | failure-domain.beta.kubernetes.io/zone | AZ where the EVS disk is deployed. Use the same value as that of the FlexVolume PVC. | +------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | everest.io/disk-volume-type | Storage class of the EVS disk. The value can be **SAS**, **SSD**, or **SATA**. Set this parameter to the same value as that of the PV created in :ref:`3 `. | + | everest.io/disk-volume-type | Storage class of the EVS disk. The value can be **SAS** or **SSD**. Set this parameter to the same value as that of the PV created in :ref:`2 `. | +------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | name | PVC name, which must be unique in the namespace. The value must be unique in the namespace. (If the PVC is dynamically created by a stateful application, the value of this parameter must be the same as the name of the FlexVolume PVC.) | +------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ @@ -278,7 +276,7 @@ Procedure +------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | storage | Requested capacity of the PVC, which must be the same as the storage size of the existing PV. | +------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | volumeName | Name of the PV. Set this parameter to the name of the static PV in :ref:`3 `. | + | volumeName | Name of the PV. Set this parameter to the name of the static PV in :ref:`2 `. | +------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | storageClassName | Name of the Kubernetes storage class. Set this field to **csi-disk** for EVS disks. | +------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ @@ -318,7 +316,7 @@ Procedure +------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | storageClassName | Set this field to **csi-nas**. | +------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | volumeName | Name of the PV. Set this parameter to the name of the static PV in :ref:`3 `. | + | volumeName | Name of the PV. Set this parameter to the name of the static PV in :ref:`2 `. | +------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ Configuration example of **a PVC for an OBS volume**: @@ -350,7 +348,7 @@ Procedure +----------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | Parameter | Description | +============================+============================================================================================================================================================================================================================================+ - | everest.io/obs-volume-type | OBS volume type, which can be **STANDARD** (standard bucket) and **WARM** (infrequent access bucket). Set this parameter to the same value as that of the PV created in :ref:`3 `. | + | everest.io/obs-volume-type | OBS volume type, which can be **STANDARD** (standard bucket) and **WARM** (infrequent access bucket). Set this parameter to the same value as that of the PV created in :ref:`2 `. | +----------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | csi.storage.k8s.io/fstype | File type, which can be **obsfs** or **s3fs**. The value must be the same as that of **fsType** of the static OBS volume PV. | +----------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ @@ -358,11 +356,11 @@ Procedure +----------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | namespace | Namespace to which the PVC belongs. Use the same value as that of the FlexVolume PVC. | +----------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | storage | Storage capacity in the unit of Gi. Set this parameter to the fixed value **1Gi**. | + | storage | Storage capacity, in the unit of Gi. Set this parameter to the fixed value **1Gi**. | +----------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | storageClassName | Name of the Kubernetes storage class. Set this field to **csi-obs**. | +----------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | volumeName | Name of the PV. Set this parameter to the name of the static PV created in :ref:`3 `. | + | volumeName | Name of the PV. Set this parameter to the name of the static PV created in :ref:`2 `. | +----------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ Configuration example of **a PVC for an SFS Turbo volume**: @@ -400,7 +398,7 @@ Procedure +------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | storage | Storage capacity, in the unit of Gi. The value must be the same as the storage size of the existing PV. | +------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | volumeName | Name of the PV. Set this parameter to the name of the static PV created in :ref:`3 `. | + | volumeName | Name of the PV. Set this parameter to the name of the static PV created in :ref:`2 `. | +------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ #. .. _cce_bestpractice_0107__li487255772614: @@ -417,7 +415,7 @@ Procedure .. note:: - Replace the example file name **pvc-example.yaml** in the preceding commands with the names of the YAML files configured in :ref:`3 ` and :ref:`4 `. + Replace the example file name **pvc-example.yaml** in the preceding commands with the names of the YAML files configured in :ref:`2 ` and :ref:`3 `. b. Go to the CCE console. On the workload upgrade page, click **Upgrade** > **Advanced Settings** > **Data Storage** > **Cloud Storage**. @@ -437,7 +435,7 @@ Procedure .. note:: - Replace the example file name **pvc-example.yaml** in the preceding commands with the names of the YAML files configured in :ref:`3 ` and :ref:`4 `. + Replace the example file name **pvc-example.yaml** in the preceding commands with the names of the YAML files configured in :ref:`2 ` and :ref:`3 `. b. Run the **kubectl edit** command to edit the StatefulSet and use the newly created PVC. @@ -475,7 +473,7 @@ Procedure .. note:: - Replace the example file name **pvc-example.yaml** in the preceding commands with the names of the YAML files configured in :ref:`3 ` and :ref:`4 `. + Replace the example file name **pvc-example.yaml** in the preceding commands with the names of the YAML files configured in :ref:`2 ` and :ref:`3 `. e. Change the number of pods back to the original value and wait until the pods are running. @@ -511,7 +509,7 @@ Procedure namespace: default creationTimestamp: null annotations: - everest.io/disk-volume-type: SATA + everest.io/disk-volume-type: SAS spec: accessModes: - ReadWriteOnce @@ -520,7 +518,7 @@ Procedure storage: 10Gi storageClassName: csi-disk - The parameter value must be the same as the PVC of the EVS volume created in :ref:`4 `. + The parameter value must be the same as the PVC of the EVS volume created in :ref:`3 `. Configuration example of **volumeClaimTemplates for an SFS volume**: @@ -537,9 +535,9 @@ Procedure resources: requests: storage: 10Gi - storageClassName: csi-na + storageClassName: csi-nas - The parameter value must be the same as the PVC of the SFS volume created in :ref:`4 `. + The parameter value must be the same as the PVC of the SFS volume created in :ref:`3 `. Configuration example of **volumeClaimTemplates for an OBS volume**: @@ -561,7 +559,7 @@ Procedure storage: 1Gi storageClassName: csi-obs - The parameter value must be the same as the PVC of the OBS volume created in :ref:`4 `. + The parameter value must be the same as the PVC of the OBS volume created in :ref:`3 `. - Delete the StatefulSet. @@ -578,7 +576,7 @@ Procedure .. note:: - If a rollback is required, perform :ref:`5 `. Select the PVC in FlexVolume format and upgrade the application. + If a rollback is required, perform :ref:`4 `. Select the PVC in FlexVolume format and upgrade the application. #. Uninstall the PVC in the FlexVolume format. @@ -588,6 +586,10 @@ Procedure .. caution:: - Before deleting a PV, change the persistentVolumeReclaimPolicy policy of the PV to **Retain**. Otherwise, the underlying storage will be reclaimed after the PV is deleted. + Before deleting a PV, change the persistentVolumeReclaimPolicy of the PV to **Retain**. Otherwise, the underlying storage will be reclaimed after the PV is deleted. -.. |image1| image:: /_static/images/en-us_image_0000001178352604.png + If the cluster has been upgraded before the storage migration, PVs may fail to be deleted. You can remove the PV protection field **finalizers** to delete PVs. + + kubectl patch pv {pv_name} -p '{"metadata":{"finalizers":null}}' + +.. |image1| image:: /_static/images/en-us_image_0000001097062729.png diff --git a/umn/source/best_practice/storage/index.rst b/umn/source/best_practice/storage/index.rst new file mode 100644 index 0000000..1c955f5 --- /dev/null +++ b/umn/source/best_practice/storage/index.rst @@ -0,0 +1,24 @@ +:original_name: cce_bestpractice_0053.html + +.. _cce_bestpractice_0053: + +Storage +======= + +- :ref:`Expanding Node Disk Capacity ` +- :ref:`Mounting an Object Storage Bucket of a Third-Party Tenant ` +- :ref:`Dynamically Creating and Mounting Subdirectories of an SFS Turbo File System ` +- :ref:`How Do I Change the Storage Class Used by a Cluster of v1.15 from FlexVolume to CSI Everest? ` +- :ref:`Custom Storage Classes ` +- :ref:`Realizing Automatic Topology for EVS Disks When Nodes Are Deployed Across AZs (csi-disk-topology) ` + +.. toctree:: + :maxdepth: 1 + :hidden: + + expanding_node_disk_capacity + mounting_an_object_storage_bucket_of_a_third-party_tenant + dynamically_creating_and_mounting_subdirectories_of_an_sfs_turbo_file_system + how_do_i_change_the_storage_class_used_by_a_cluster_of_v1.15_from_flexvolume_to_csi_everest + custom_storage_classes + realizing_automatic_topology_for_evs_disks_when_nodes_are_deployed_across_azs_csi-disk-topology diff --git a/umn/source/best_practice/storage/mounting_an_object_storage_bucket_of_a_third-party_tenant.rst b/umn/source/best_practice/storage/mounting_an_object_storage_bucket_of_a_third-party_tenant.rst new file mode 100644 index 0000000..9c6ce2a --- /dev/null +++ b/umn/source/best_practice/storage/mounting_an_object_storage_bucket_of_a_third-party_tenant.rst @@ -0,0 +1,195 @@ +:original_name: cce_bestpractice_00199.html + +.. _cce_bestpractice_00199: + +Mounting an Object Storage Bucket of a Third-Party Tenant +========================================================= + +This section describes how to mount OBS buckets and OBS parallel file systems (preferred) of third-party tenants. + +Scenario +-------- + +The CCE cluster of a SaaS service provider needs to be mounted with the OBS bucket of a third-party tenant, as shown in :ref:`Figure 1 `. + +.. _cce_bestpractice_00199__fig1315433183918: + +.. figure:: /_static/images/en-us_image_0268523694.png + :alt: **Figure 1** Mounting an OBS bucket of a third-party tenant + + **Figure 1** Mounting an OBS bucket of a third-party tenant + +#. :ref:`The third-party tenant authorizes the SaaS service provider to access the OBS buckets or parallel file systems ` by setting the bucket policy and bucket ACL. +#. :ref:`The SaaS service provider statically imports the OBS buckets and parallel file systems of the third-party tenant `. +#. The SaaS service provider processes the service and writes the processing result (result file or result data) back to the OBS bucket of the third-party tenant. + +Precautions +----------- + +- Only parallel file systems and OBS buckets of third-party tenants in the same region can be mounted. +- Only clusters where the everest add-on of v1.1.11 or later has been installed (the cluster version must be v1.15 or later) can be mounted with OBS buckets of third-party tenants. +- The service platform of the SaaS service provider needs to manage the lifecycle of the third-party bucket PVs. When a PVC is deleted separately, the PV is not deleted. Instead, it will be retained. To do so, you need to call the native Kubernetes APIs to create and delete static PVs. + +.. _cce_bestpractice_00199__section193471249193310: + +Authorizing the SaaS Service Provider to Access the OBS Buckets +--------------------------------------------------------------- + +The following uses an OBS bucket as an example to describe how to set a bucket policy and bucket ACL to authorize the SaaS service provider. The configuration for an OBS parallel file system is the same. + +#. Log in to the OBS console. In the navigation pane, choose **Buckets**. +#. In the bucket list, click a bucket name to access the **Overview** page. + +3. In the navigation pane, choose **Permissions** > **Bucket Policy**. On the displayed page, click **Create** to create a bucket policy. + + Set the parameters as shown in the following figure. + + + .. figure:: /_static/images/en-us_image_0000001325377749.png + :alt: **Figure 2** Creating a bucket policy + + **Figure 2** Creating a bucket policy + + - **Policy Mode**: Select **Customized**. + - **Effect**: Select **Allow**. + - **Principal**: Select **Other account**, and enter the account ID and user ID. The bucket policy takes effect for the specified users. + - **Resources**: Select the resources that can be operated. + - **Actions**: Select the actions that can be operated. + +4. In the navigation pane, choose **Permissions** > **Bucket ACLs**. In the right pane, click **Add**.Enter the account ID or account name of the authorized user, select **Read** and **Write** for **Access to Bucket**, select **Read** and **Write** for **Access to ACL**, and click **OK**. + +.. _cce_bestpractice_00199__en-us_topic_0196817407_section155006183017: + +Statically Importing OBS Buckets and Parallel File Systems +---------------------------------------------------------- + +- **Static PV of an OBS bucket:** + + .. code-block:: + + apiVersion: v1 + kind: PersistentVolume + metadata: + name: objbucket #Replace the name with the actual PV name of the bucket. + annotations: + pv.kubernetes.io/provisioned-by: everest-csi-provisioner + spec: + accessModes: + - ReadWriteMany + capacity: + storage: 1Gi + mountOptions: + - default_acl=bucket-owner-full-control #New OBS mounting parameters + csi: + driver: obs.csi.everest.io + fsType: obsfs + volumeAttributes: + everest.io/obs-volume-type: STANDARD + everest.io/region: eu-de #Set it to the ID of the current region. + storage.kubernetes.io/csiProvisionerIdentity: everest-csi-provisioner + volumeHandle: objbucket #Replace the name with the actual bucket name of the third-party tenant. + persistentVolumeReclaimPolicy: Retain #This parameter must be set to Retain to ensure that the bucket will not be deleted when a PV is deleted. + storageClassName: csi-obs-mountoption #You can associate a new custom OBS storage class or the built-in csi-obs of the cluster. + + - **mountOptions**: This field contains the new OBS mounting parameters that allow the bucket owner to have full access to the data in the bucket. This field solves the problem that the bucket owner cannot read the data written into a mounted third-party bucket. If the object storage of a third-party tenant is mounted, **default_acl** must be set to **bucket-owner-full-control**. For details about other values of **default_acl**, see `Bucket ACLs and Object ACLs `__. + - **persistentVolumeReclaimPolicy**: When the object storage of a third-party tenant is mounted, this field must be set to **Retain**. In this way, the OBS bucket will not be deleted when a PV is deleted. The service platform of the SaaS service provider needs to manage the lifecycle of the third-party bucket PVs. When a PVC is deleted separately, the PV is not deleted. Instead, it will be retained. To do so, you need to call the native Kubernetes APIs to create and delete static PVs. + - **storageClassName**: You can associate a new custom OBS storage class (:ref:`click here `) or the built-in csi-obs of the cluster. + + **PVC of a bound OBS bucket:** + + .. code-block:: + + apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + annotations: + csi.storage.k8s.io/fstype: obsfs + everest.io/obs-volume-type: STANDARD + volume.beta.kubernetes.io/storage-provisioner: everest-csi-provisioner + name: objbucketpvc #Replace the name with the actual PVC name of the bucket. + namespace: default + spec: + accessModes: + - ReadWriteMany + resources: + requests: + storage: 1Gi + storageClassName: csi-obs-mountoption #The value must be the same as the storage class associated with the bound PV. + volumeName: objbucket #Replace the name with the actual PV name of the bucket to be bound. + +- **Static PV of an OBS parallel file system:** + + .. code-block:: + + apiVersion: v1 + kind: PersistentVolume + metadata: + name: obsfscheck #Replace the name with the actual PV name of the parallel file system. + annotations: + pv.kubernetes.io/provisioned-by: everest-csi-provisioner + spec: + accessModes: + - ReadWriteMany + capacity: + storage: 1Gi + mountOptions: + - default_acl=bucket-owner-full-control #New OBS mounting parameters + csi: + driver: obs.csi.everest.io + fsType: obsfs + volumeAttributes: + everest.io/obs-volume-type: STANDARD + everest.io/region: eu-de + storage.kubernetes.io/csiProvisionerIdentity: everest-csi-provisioner + volumeHandle: obsfscheck #Replace the name with the actual name of the parallel file system of the third-party tenant. + persistentVolumeReclaimPolicy: Retain #This parameter must be set to Retain to ensure that the bucket will not be deleted when a PV is deleted. + storageClassName: csi-obs-mountoption #You can associate a new custom OBS storage class or the built-in csi-obs of the cluster. + + - **mountOptions**: This field contains the new OBS mounting parameters that allow the bucket owner to have full access to the data in the bucket. This field solves the problem that the bucket owner cannot read the data written into a mounted third-party bucket. If the object storage of a third-party tenant is mounted, **default_acl** must be set to **bucket-owner-full-control**. For details about other values of **default_acl**, see `Bucket ACLs and Object ACLs `__. + - **persistentVolumeReclaimPolicy**: When the object storage of a third-party tenant is mounted, this field must be set to **Retain**. In this way, the OBS bucket will not be deleted when a PV is deleted. The service platform of the SaaS service provider needs to manage the lifecycle of the third-party bucket PVs. When a PVC is deleted separately, the PV is not deleted. Instead, it will be retained. To do so, you need to call the native Kubernetes APIs to create and delete static PVs. + - **storageClassName**: You can associate a new custom OBS storage class (:ref:`click here `) or the built-in csi-obs of the cluster. + + PVC of a bound OBS parallel file system: + + .. code-block:: + + apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + annotations: + csi.storage.k8s.io/fstype: obsfs + everest.io/obs-volume-type: STANDARD + volume.beta.kubernetes.io/storage-provisioner: everest-csi-provisioner + name: obsfscheckpvc #Replace the name with the actual PVC name of the parallel file system. + namespace: default + spec: + accessModes: + - ReadWriteMany + resources: + requests: + storage: 1Gi + storageClassName: csi-obs-mountoption #The value must be the same as the storage class associated with the bound PV. + volumeName: obsfscheck #Replace the name with the actual PV name of the parallel file system. + +- .. _cce_bestpractice_00199__li1235812419467: + + **(Optional) Creating a custom OBS storage class to associate with a static PV:** + + .. code-block:: + + apiVersion: storage.k8s.io/v1 + kind: StorageClass + metadata: + name: csi-obs-mountoption + mountOptions: + - default_acl=bucket-owner-full-control + parameters: + csi.storage.k8s.io/csi-driver-name: obs.csi.everest.io + csi.storage.k8s.io/fstype: obsfs + everest.io/obs-volume-type: STANDARD + provisioner: everest-csi-provisioner + reclaimPolicy: Retain + volumeBindingMode: Immediate + + - **csi.storage.k8s.io/fstype**: File type. The value can be **obsfs** or **s3fs**. If the value is **s3fs**, an OBS bucket is created and mounted using s3fs. If the value is **obsfs**, an OBS parallel file system is created and mounted using obsfs. + - **reclaimPolicy**: Reclaim policy of a PV. The value will be set in **PV.spec.persistentVolumeReclaimPolicy** dynamically created based on the new PVC associated with the storage class. If the value is **Delete**, the external OBS bucket and the PV will be deleted when the PVC is deleted. If the value is **Retain**, the PV and external storage are retained when the PVC is deleted. In this case, you need to clear the PV separately. In the scenario where an imported third-party bucket is associated, the storage class is used only for associating static PVs (with this field set to **Retain**). Dynamic creation is not involved. diff --git a/umn/source/best_practice/storage/realizing_automatic_topology_for_evs_disks_when_nodes_are_deployed_across_azs_csi-disk-topology.rst b/umn/source/best_practice/storage/realizing_automatic_topology_for_evs_disks_when_nodes_are_deployed_across_azs_csi-disk-topology.rst new file mode 100644 index 0000000..43bf91f --- /dev/null +++ b/umn/source/best_practice/storage/realizing_automatic_topology_for_evs_disks_when_nodes_are_deployed_across_azs_csi-disk-topology.rst @@ -0,0 +1,324 @@ +:original_name: cce_bestpractice_00284.html + +.. _cce_bestpractice_00284: + +Realizing Automatic Topology for EVS Disks When Nodes Are Deployed Across AZs (csi-disk-topology) +================================================================================================= + +Challenges +---------- + +EVS disks cannot be attached across AZs. For example, EVS disks in AZ 1 cannot be attached to nodes in AZ 2. + +If the storage class csi-disk is used for StatefulSets, when a StatefulSet is scheduled, a PVC and a PV are created immediately (an EVS disk is created along with the PV), and then the PVC is bound to the PV. + +However, when the cluster nodes are located in multiple AZs, the EVS disk created by the PVC and the node to which the pods are scheduled may be in different AZs. As a result, the pods fail to be scheduled. + +|image1| + +Solution +-------- + +CCE provides a storage class named **csi-disk-topology**. When you use this storage class to create a PVC, no PV will be created in pace with the PVC. Instead, the PV is created in the AZ of the node where the pod will be scheduled. An EVS disk is then created in the same AZ to ensure that the EVS disk can be attached and the pod can be successfully scheduled. + +csi-disk-topology postpones the binding between a PVC and a PV for a while. + +|image2| + +Failed Pod Scheduling Due to csi-disk Used in Cross-AZ Node Deployment +---------------------------------------------------------------------- + +Create a cluster with three nodes in different AZs. + +Use the csi-disk storage class to create a StatefulSet and check whether the workload is successfully created. + +.. code-block:: + + apiVersion: apps/v1 + kind: StatefulSet + metadata: + name: nginx + spec: + serviceName: nginx # Name of the headless Service + replicas: 4 + selector: + matchLabels: + app: nginx + template: + metadata: + labels: + app: nginx + spec: + containers: + - name: container-0 + image: nginx:alpine + resources: + limits: + cpu: 600m + memory: 200Mi + requests: + cpu: 600m + memory: 200Mi + volumeMounts: # Storage mounted to the pod + - name: data + mountPath: /usr/share/nginx/html # Mount the storage to /usr/share/nginx/html. + imagePullSecrets: + - name: default-secret + volumeClaimTemplates: + - metadata: + name: data + annotations: + everest.io/disk-volume-type: SAS + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 1Gi + storageClassName: csi-disk + +The StatefulSet uses the following headless Service. + +.. code-block:: + + apiVersion: v1 + kind: Service # Object type (Service) + metadata: + name: nginx + labels: + app: nginx + spec: + ports: + - name: nginx # Name of the port for communication between pods + port: 80 # Port number for communication between pods + selector: + app: nginx # Select the pod whose label is app:nginx. + clusterIP: None # Set this parameter to None, indicating the headless Service. + +After the creation, check the PVC and pod status. In the following output, the PVC has been created and bound successfully, and a pod is in the Pending state. + +.. code-block:: + + # kubectl get pvc -owide + NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS AGE VOLUMEMODE + data-nginx-0 Bound pvc-04e25985-fc93-4254-92a1-1085ce19d31e 1Gi RWO csi-disk 64s Filesystem + data-nginx-1 Bound pvc-0ae6336b-a2ea-4ddc-8f63-cfc5f9efe189 1Gi RWO csi-disk 47s Filesystem + data-nginx-2 Bound pvc-aa46f452-cc5b-4dbd-825a-da68c858720d 1Gi RWO csi-disk 30s Filesystem + data-nginx-3 Bound pvc-3d60e532-ff31-42df-9e78-015cacb18a0b 1Gi RWO csi-disk 14s Filesystem + + # kubectl get pod -owide + NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES + nginx-0 1/1 Running 0 2m25s 172.16.0.12 192.168.0.121 + nginx-1 1/1 Running 0 2m8s 172.16.0.136 192.168.0.211 + nginx-2 1/1 Running 0 111s 172.16.1.7 192.168.0.240 + nginx-3 0/1 Pending 0 95s + +The event information of the pod shows that the scheduling fails due to no available node. Two nodes (in AZ 1 and AZ 2) do not have sufficient CPUs, and the created EVS disk is not in the AZ where the third node (in AZ 3) is located. As a result, the pod cannot use the EVS disk. + +.. code-block:: + + # kubectl describe pod nginx-3 + Name: nginx-3 + ... + Events: + Type Reason Age From Message + ---- ------ ---- ---- ------- + Warning FailedScheduling 111s default-scheduler 0/3 nodes are available: 3 pod has unbound immediate PersistentVolumeClaims. + Warning FailedScheduling 111s default-scheduler 0/3 nodes are available: 3 pod has unbound immediate PersistentVolumeClaims. + Warning FailedScheduling 28s default-scheduler 0/3 nodes are available: 1 node(s) had volume node affinity conflict, 2 Insufficient cpu. + +Check the AZ where the EVS disk created from the PVC is located. It is found that data-nginx-3 is in AZ 1. In this case, the node in AZ 1 has no resources, and only the node in AZ 3 has CPU resources. As a result, the scheduling fails. Therefore, there should be a delay between creating the PVC and binding the PV. + +Storage Class for Delayed Binding +--------------------------------- + +If you check the cluster storage class, you can see that the binding mode of csi-disk-topology is **WaitForFirstConsumer**, indicating that a PV is created and bound when a pod uses the PVC. That is, the PV and the underlying storage resources are created based on the pod information. + +.. code-block:: + + # kubectl get storageclass + NAME PROVISIONER RECLAIMPOLICY VOLUMEBINDINGMODE ALLOWVOLUMEEXPANSION AGE + csi-disk everest-csi-provisioner Delete Immediate true 156m + csi-disk-topology everest-csi-provisioner Delete WaitForFirstConsumer true 156m + csi-nas everest-csi-provisioner Delete Immediate true 156m + csi-obs everest-csi-provisioner Delete Immediate false 156m + +**VOLUMEBINDINGMODE** is displayed if your cluster is v1.19. It is not displayed in clusters of v1.17 or v1.15. + +You can also view the binding mode in the csi-disk-topology details. + +.. code-block:: + + # kubectl describe sc csi-disk-topology + Name: csi-disk-topology + IsDefaultClass: No + Annotations: + Provisioner: everest-csi-provisioner + Parameters: csi.storage.k8s.io/csi-driver-name=disk.csi.everest.io,csi.storage.k8s.io/fstype=ext4,everest.io/disk-volume-type=SAS,everest.io/passthrough=true + AllowVolumeExpansion: True + MountOptions: + ReclaimPolicy: Delete + VolumeBindingMode: WaitForFirstConsumer + Events: + +Create PVCs of the csi-disk and csi-disk-topology classes. Observe the differences between these two types of PVCs. + +- csi-disk + + .. code-block:: + + apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: disk + annotations: + everest.io/disk-volume-type: SAS + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 10Gi + storageClassName: csi-disk # StorageClass + +- csi-disk-topology + + .. code-block:: + + apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: topology + annotations: + everest.io/disk-volume-type: SAS + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 10Gi + storageClassName: csi-disk-topology # StorageClass + +View the PVC details. As shown below, the csi-disk PVC is in Bound state and the csi-disk-topology PVC is in Pending state. + +.. code-block:: + + # kubectl create -f pvc1.yaml + persistentvolumeclaim/disk created + # kubectl create -f pvc2.yaml + persistentvolumeclaim/topology created + # kubectl get pvc + NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS AGE + disk Bound pvc-88d96508-d246-422e-91f0-8caf414001fc 10Gi RWO csi-disk 18s + topology Pending csi-disk-topology 2s + +View details about the csi-disk-topology PVC. You can see that "waiting for first consumer to be created before binding" is displayed in the event, indicating that the PVC is bound after the consumer (pod) is created. + +.. code-block:: + + # kubectl describe pvc topology + Name: topology + Namespace: default + StorageClass: csi-disk-topology + Status: Pending + Volume: + Labels: + Annotations: everest.io/disk-volume-type: SAS + Finalizers: [kubernetes.io/pvc-protection] + Capacity: + Access Modes: + VolumeMode: Filesystem + Used By: + Events: + Type Reason Age From Message + ---- ------ ---- ---- ------- + Normal WaitForFirstConsumer 5s (x3 over 30s) persistentvolume-controller waiting for first consumer to be created before binding + +Create a workload that uses the PVC. Set the PVC name to **topology**. + +.. code-block:: + + apiVersion: apps/v1 + kind: Deployment + metadata: + name: nginx-deployment + spec: + selector: + matchLabels: + app: nginx + replicas: 1 + template: + metadata: + labels: + app: nginx + spec: + containers: + - image: nginx:alpine + name: container-0 + volumeMounts: + - mountPath: /tmp # Mount path + name: topology-example + restartPolicy: Always + volumes: + - name: topology-example + persistentVolumeClaim: + claimName: topology # PVC name + +After the PVC is created, check the PVC details. You can see that the PVC is bound successfully. + +.. code-block:: + + # kubectl describe pvc topology + Name: topology + Namespace: default + StorageClass: csi-disk-topology + Status: Bound + .... + Used By: nginx-deployment-fcd9fd98b-x6tbs + Events: + Type Reason Age From Message + ---- ------ ---- ---- ------- + Normal WaitForFirstConsumer 84s (x26 over 7m34s) persistentvolume-controller waiting for first consumer to be created before binding + Normal Provisioning 54s everest-csi-provisioner_everest-csi-controller-7965dc48c4-5k799_2a6b513e-f01f-4e77-af21-6d7f8d4dbc98 External provisioner is provisioning volume for claim "default/topology" + Normal ProvisioningSucceeded 52s everest-csi-provisioner_everest-csi-controller-7965dc48c4-5k799_2a6b513e-f01f-4e77-af21-6d7f8d4dbc98 Successfully provisioned volume pvc-9a89ea12-4708-4c71-8ec5-97981da032c9 + +Using csi-disk-topology in Cross-AZ Node Deployment +--------------------------------------------------- + +The following uses csi-disk-topology to create a StatefulSet with the same configurations used in the preceding example. + +.. code-block:: + + volumeClaimTemplates: + - metadata: + name: data + annotations: + everest.io/disk-volume-type: SAS + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 1Gi + storageClassName: csi-disk-topology + +After the creation, check the PVC and pod status. As shown in the following output, the PVC and pod can be created successfully. The nginx-3 pod is created on the node in AZ 3. + +.. code-block:: + + # kubectl get pvc -owide + NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS AGE VOLUMEMODE + data-nginx-0 Bound pvc-43802cec-cf78-4876-bcca-e041618f2470 1Gi RWO csi-disk-topology 55s Filesystem + data-nginx-1 Bound pvc-fc942a73-45d3-476b-95d4-1eb94bf19f1f 1Gi RWO csi-disk-topology 39s Filesystem + data-nginx-2 Bound pvc-d219f4b7-e7cb-4832-a3ae-01ad689e364e 1Gi RWO csi-disk-topology 22s Filesystem + data-nginx-3 Bound pvc-b54a61e1-1c0f-42b1-9951-410ebd326a4d 1Gi RWO csi-disk-topology 9s Filesystem + + # kubectl get pod -owide + NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES + nginx-0 1/1 Running 0 65s 172.16.1.8 192.168.0.240 + nginx-1 1/1 Running 0 49s 172.16.0.13 192.168.0.121 + nginx-2 1/1 Running 0 32s 172.16.0.137 192.168.0.211 + nginx-3 1/1 Running 0 19s 172.16.1.9 192.168.0.240 + +.. |image1| image:: /_static/images/en-us_image_0000001113962636.png +.. |image2| image:: /_static/images/en-us_image_0000001160642447.png diff --git a/umn/source/change_history.rst b/umn/source/change_history.rst index 07d2267..3c12c3e 100644 --- a/umn/source/change_history.rst +++ b/umn/source/change_history.rst @@ -10,6 +10,12 @@ Change History +-----------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | Released On | What's New | +===================================+=======================================================================================================================================================================================================================================+ + | 2022-12-20 | - Updated :ref:`OS Patch Notes for Cluster Nodes `. | + | | - Added :ref:`volcano `. | + | | - Added :ref:`Service Account Token Security Improvement `. | + +-----------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | 2022-11-21 | Added :ref:`Best Practice `. | + +-----------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | 2022-08-27 | EulerOS 2.9 is supported. For details, see :ref:`OS Patch Notes for Cluster Nodes `. | +-----------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | 2022-07-13 | Supported egress rules. For details, see :ref:`Network Policies `. | @@ -154,7 +160,6 @@ Change History +-----------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | 2018-09-15 | - Added a step to the procedure of adding a node to a BMS cluster. For details, see 3.4-Adding Existing Nodes to a BMS Cluster. | | | - Deleted the EVS and ELB related constraints. For details, see 3.4-Constraints. | - | | - Added the description of DeH in 3.7-Table Parameters for creating a node. | +-----------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | 2018-09-05 | - Only manual scaling is supported by stateful applications. For details, see 4.10-Manual Scaling. | | | - Added the procedure for creating BMS clusters. | diff --git a/umn/source/clusters/cce_turbo_clusters_and_cce_clusters.rst b/umn/source/clusters/cce_turbo_clusters_and_cce_clusters.rst index 4519afe..046795e 100644 --- a/umn/source/clusters/cce_turbo_clusters_and_cce_clusters.rst +++ b/umn/source/clusters/cce_turbo_clusters_and_cce_clusters.rst @@ -12,26 +12,26 @@ The following table lists the differences between CCE Turbo clusters and CCE clu .. table:: **Table 1** Cluster types - +-----------------+-----------------------------+----------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------+ - | Dimensions | Sub-dimension | CCE Turbo Cluster | CCE Cluster | - +=================+=============================+==================================================================================================================================+======================================================================================================================================+ - | Cluster | Positioning | Next-generation container cluster for Cloud Native 2.0 with accelerated computing, networking, and scheduling | Standard cluster for common commercial use | - +-----------------+-----------------------------+----------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------+ - | | Node type | Hybrid deployment of VMs and bare-metal servers | Hybrid deployment of VMs | - +-----------------+-----------------------------+----------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------+ - | Network | Network model | **Cloud Native Network 2.0**: applies to large-scale and high-performance scenarios. | **Cloud-native network 1.0** for scenarios that do not require high performance or involve large-scale deployment. | - | | | | | - | | | Networking scale: 2000 nodes | - Tunnel network model | - | | | | - VPC network model | - +-----------------+-----------------------------+----------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------+ - | | Network performance | The VPC network and container network are flattened into one, achieving zero performance loss. | The VPC network is overlaid with the container network, causing certain performance loss. | - +-----------------+-----------------------------+----------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------+ - | | Container network isolation | Pods can be directly associated with security groups to configure isolation policies for resources inside and outside a cluster. | - Tunnel network model: Network isolation policies are supported for intra-cluster communication (by configuring network policies). | - | | | | - VPC network model: Isolation is not supported. | - +-----------------+-----------------------------+----------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------+ - | Security | Isolation | - Bare-metal server: You can select secure containers for VM-level isolation. | Common containers are deployed and isolated by Cgroups. | - | | | - VM: Common containers are deployed. | | - +-----------------+-----------------------------+----------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------+ + +-----------------+-----------------------------+--------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------+ + | Dimension | Sub-dimension | CCE Turbo Cluster | CCE Cluster | + +=================+=============================+================================================================================================================================+========================================================================================+ + | Cluster | Positioning | Next-gen container cluster, with accelerated computing, networking, and scheduling. Designed for Cloud Native 2.0 | Standard cluster for common commercial use | + +-----------------+-----------------------------+--------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------+ + | | Node type | Hybrid deployment of VMs and bare-metal servers | Hybrid deployment of VMs | + +-----------------+-----------------------------+--------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------+ + | Networking | Model | **Cloud Native Network 2.0**: applies to large-scale and high-performance scenarios. | **Cloud-native network 1.0**: applies to common, smaller-scale scenarios. | + | | | | | + | | | Max networking scale: 2,000 nodes | - Tunnel network model | + | | | | - VPC network model | + +-----------------+-----------------------------+--------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------+ + | | Performance | Flattens the VPC network and container network into one. No performance loss. | Overlays the VPC network with the container network, causing certain performance loss. | + +-----------------+-----------------------------+--------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------+ + | | Container network isolation | Associates pods with security groups. Unifies security isolation in and out the cluster via security groups' network policies. | - Tunnel network model: supports network policies for intra-cluster communications. | + | | | | - VPC network model: supports no isolation. | + +-----------------+-----------------------------+--------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------+ + | Security | Isolation | - Physical machine: runs Kata containers, allowing VM-level isolation. | Runs common containers, isolated by cgroups. | + | | | - VM: runs common containers. | | + +-----------------+-----------------------------+--------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------+ QingTian Architecture --------------------- diff --git a/umn/source/clusters/cluster_overview.rst b/umn/source/clusters/cluster_overview.rst index f742ea5..c74abec 100644 --- a/umn/source/clusters/cluster_overview.rst +++ b/umn/source/clusters/cluster_overview.rst @@ -14,7 +14,7 @@ When using Kubernetes, it is like you run a large number of servers as one on wh Kubernetes Cluster Architecture ------------------------------- -A Kubernetes cluster consists of master nodes (masters) and worker nodes (nodes). Applications are deployed on worker nodes, and you can specify the nodes for deployment. +A Kubernetes cluster consists of master nodes (Masters) and worker nodes (Nodes). Applications are deployed on worker nodes, and you can specify the nodes for deployment. The following figure shows the architecture of a Kubernetes cluster. @@ -33,7 +33,7 @@ A master node is the machine where the control plane components run, including A - Scheduler: schedules containers to nodes based on various conditions (such as available resources and node affinity). - etcd: serves as a distributed data storage component that stores cluster configuration information. -In the production environment, multiple master nodes are deployed to ensure cluster high availability. For example, you can deploy three master nodes for your CCE cluster. +In a production environment, multiple master nodes are deployed to ensure high cluster availability. For example, you can deploy three master nodes for your CCE cluster. **Worker node** diff --git a/umn/source/clusters/creating_a_cce_cluster.rst b/umn/source/clusters/creating_a_cce_cluster.rst index 337f097..4908ab5 100644 --- a/umn/source/clusters/creating_a_cce_cluster.rst +++ b/umn/source/clusters/creating_a_cce_cluster.rst @@ -192,13 +192,7 @@ Procedure - **Node Name**: Enter a node name. A node name contains 1 to 56 characters starting with a lowercase letter and not ending with a hyphen (-). Only lowercase letters, digits, and hyphens (-) are allowed. - - **Specifications**: Select node specifications that best fit your business needs. - - - **General-purpose**: provides a balance of computing, memory, and network resources. It is a good choice for many applications, such as web servers, workload development, workload testing, and small-scale databases. - - **Memory-optimized**: provides higher memory capacity than general-purpose nodes and is suitable for relational databases, NoSQL, and other workloads that are both memory-intensive and data-intensive. - - **GPU-accelerated**: provides powerful floating-point computing and is suitable for real-time, highly concurrent massive computing. Graphical processing units (GPUs) of P series are suitable for deep learning, scientific computing, and CAE. GPUs of G series are suitable for 3D animation rendering and CAD. **GPU-accelerated nodes can be created only in clusters of v1.11 or later**. GPU-accelerated nodes are available only in certain regions. - - **General computing-plus**: provides stable performance and exclusive resources to enterprise-class workloads with high and stable computing performance. - - **Disk-intensive**: supports :ref:`local disk storage ` and provides high network performance. It is designed for workloads requiring high throughput and data switching, such as big data workloads. + - **Specifications**: Select the node specifications based on service requirements. The available node specifications vary depending on AZs. To ensure node stability, CCE automatically reserves some resources to run necessary system components. For details, see :ref:`Formula for Calculating the Reserved Resources of a Node `. @@ -208,7 +202,7 @@ Procedure - **System Disk**: Set the system disk space of the worker node. The value ranges from 40GB to 1024 GB. The default value is 40GB. - By default, system disks support Common I/O (SATA), High I/O (SAS), and Ultra-high I/O (SSD)High I/O (SAS) and Ultra-high I/O (SSD) EVS disks. + By default, system disks support Common I/O (SATA), High I/O (SAS), and Ultra-high I/O (SSD) EVS disks. **Encryption**: Data disk encryption safeguards your data. Snapshots generated from encrypted disks and disks created using these snapshots automatically inherit the encryption function. **This function is available only in certain regions.** @@ -277,7 +271,9 @@ Procedure - **VPC**: A VPC where the current cluster is located. This parameter cannot be changed and is displayed only for clusters of v1.13.10-r0 or later. - - **Subnet**: A subnet improves network security by providing exclusive network resources that are isolated from other networks. You can select any subnet in the cluster VPC. Cluster nodes can belong to different subnets. + - .. _cce_01_0028__li796613104535: + + **Subnet**: A subnet improves network security by providing exclusive network resources that are isolated from other networks. You can select any subnet in the cluster VPC. Cluster nodes can belong to different subnets. During the node creation, software packages are downloaded from OBS using the domain name. You need to use a private DNS server to resolve the OBS domain name, and configure the subnet where the node resides with a private DNS server address. When you create a subnet, the private DNS server is used by default. If you change the subnet DNS, ensure that the DNS server in use can resolve the OBS domain name. @@ -333,6 +329,10 @@ Procedure - **Subnet IP Address**: Select **Automatically assign IP address** (recommended) or **Manually assigning IP addresses**. + .. note:: + + When you **manually assign IPs**, the master IP is randomly specified. Therefore, it may conflict with the worker node IP. If you prefer the manual operation, you are advised to select a subnet CIDR block different from that of the master node when setting worker node :ref:`subnet `. + - **Advanced Kubernetes Settings**: (Optional) Click |image4| to show advanced cluster settings. - **Max Pods**: maximum number of pods that can be created on a node, including the system's default pods. If the cluster uses the **VPC network model**, the maximum value is determined by the number of IP addresses that can be allocated to containers on each node. diff --git a/umn/source/clusters/upgrading_a_cluster/cce_kubernetes_release_notes.rst b/umn/source/clusters/upgrading_a_cluster/cce_kubernetes_release_notes.rst index 52d6dfc..b9aef10 100644 --- a/umn/source/clusters/upgrading_a_cluster/cce_kubernetes_release_notes.rst +++ b/umn/source/clusters/upgrading_a_cluster/cce_kubernetes_release_notes.rst @@ -5,7 +5,7 @@ CCE Kubernetes Release Notes ============================ -CCE has passed the Certified Kubernetes Conformance Program and is a certified Kubernetes offering. To enable interoperability from one Kubernetes installation to the next, you must upgrade your Kubernetes clusters before the maintenance period ends. +To enable interoperability from one Kubernetes installation to the next, you must upgrade your Kubernetes clusters before the maintenance period ends. After the latest Kubernetes version is released, CCE will provide you the changes in this version. For details, see :ref:`Table 1 `. @@ -16,6 +16,16 @@ After the latest Kubernetes version is released, CCE will provide you the change +-----------------------+-----------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | Source Version | Target Version | Description | +=======================+=======================+==============================================================================================================================================================================================================================================================================================================================================+ + | v1.21 | v1.23 | - Changelog from v1.21 to v1.23 | + | | | | + | | | Changelog from v1.22 to v1.23: | + | | | | + | | | https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.23.md | + | | | | + | | | Changelog from v1.21 to v1.22: | + | | | | + | | | https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.22.md | + +-----------------------+-----------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | v1.19 | v1.21 | - Changelog from v1.19 to v1.21 | | | | | | | | Changelog from v1.20 to v1.21: | diff --git a/umn/source/clusters/upgrading_a_cluster/performing_in-place_upgrade_v1.15_and_later.rst b/umn/source/clusters/upgrading_a_cluster/performing_in-place_upgrade_v1.15_and_later.rst index cd9c070..f2ce6c6 100644 --- a/umn/source/clusters/upgrading_a_cluster/performing_in-place_upgrade_v1.15_and_later.rst +++ b/umn/source/clusters/upgrading_a_cluster/performing_in-place_upgrade_v1.15_and_later.rst @@ -57,7 +57,7 @@ This section describes how to upgrade a CCE cluster of v1.15 or later. For other **Figure 2** Determining whether to back up the entire master node -#. Check the version information, last update/upgrade time, available upgrade version, and upgrade history of the current cluster. +#. Check the version information, last update/upgrade time, available upgrade version, and upgrade history of the current cluster. You can click **Upgrade** or **Install Patch** to upgrade the cluster or install a patch. The cluster upgrade goes through pre-upgrade check, add-on upgrade/uninstallation, master node upgrade, worker node upgrade, and post-upgrade processing. @@ -67,7 +67,7 @@ This section describes how to upgrade a CCE cluster of v1.15 or later. For other **Figure 3** Cluster upgrade page -#. Click **Upgrade** on the right. Set the upgrade parameters. +#. Set the upgrade parameters. - **Available Versions**: Select v1.19 in this example. - **Cluster Backup**: A manual confirmation is required for backing up the entire master node. The backup process uses the Cloud Backup and Recovery (CBR) service and takes about 20 minutes. If there are many cloud backup tasks at the current site, the backup time may be prolonged. diff --git a/umn/source/index.rst b/umn/source/index.rst index a113ed7..525b336 100644 --- a/umn/source/index.rst +++ b/umn/source/index.rst @@ -26,5 +26,6 @@ Cloud Container Service - User Guide permissions_management/index cloud_trace_service_cts/index reference/index + best_practice/index migrating_data_from_cce_1.0_to_cce_2.0/index change_history diff --git a/umn/source/networking/ingress/using_kubectl_to_create_an_elb_ingress.rst b/umn/source/networking/ingress/using_kubectl_to_create_an_elb_ingress.rst index 9de5872..fd35e85 100644 --- a/umn/source/networking/ingress/using_kubectl_to_create_an_elb_ingress.rst +++ b/umn/source/networking/ingress/using_kubectl_to_create_an_elb_ingress.rst @@ -115,7 +115,7 @@ The following describes how to run the kubectl command to automatically create a +-------------------------------------------+-----------------+-----------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | Parameter | Mandatory | Type | Description | +===========================================+=================+=======================+==========================================================================================================================================================================================================================================+ - | kubernetes.io/elb.class | No | String | Select a proper load balancer type. | + | kubernetes.io/elb.class | Yes | String | Select a proper load balancer type. | | | | | | | | | | The value can be: | | | | | | diff --git a/umn/source/networking/network_policies.rst b/umn/source/networking/network_policies.rst index 73885f7..2fc217b 100644 --- a/umn/source/networking/network_policies.rst +++ b/umn/source/networking/network_policies.rst @@ -5,16 +5,22 @@ Network Policies ================ -As the service logic becomes increasingly complex, many applications require network calls between modules. Traditional external firewalls or application-based firewalls cannot meet the requirements. Network policies are urgently needed between modules, service logic layers, or functional teams in a large cluster. +NetworkPolicy is a Kubernetes object used to restrict pod access. In CCE, by setting network policies, you can define ingress rules specifying the addresses to access pods or egress rules specifying the addresses pods can access. This is equivalent to setting up a firewall at the application layer to further ensure network security. -CCE has enhanced the Kubernetes-based network policy feature, allowing network isolation in a cluster by configuring network policies. This means that a firewall can be set between pods. +Network policies depend on the networking add-on of the cluster to which the policies apply. -For example, to make a payment system accessible only to specified components for security purposes, you can configure network policies. +By default, if a namespace does not have any policy, pods in the namespace accept traffic from any source and send traffic to any destination. + +Network policy rules are classified into the following types: + +- **namespaceSelector**: selects particular namespaces for which all pods should be allowed as ingress sources or egress destinations. +- **podSelector**: selects particular pods in the same namespace as the network policy which should be allowed as ingress sources or egress destinations. +- **ipBlock**: selects particular IP blocks to allow as ingress sources or egress destinations. (Only egress rules support IP blocks.) Notes and Constraints --------------------- -- Only clusters that use the **tunnel network model** support network policies. +- Only clusters that use the tunnel network model support network policies. - Network isolation is not supported for IPv6 addresses. @@ -28,11 +34,6 @@ Notes and Constraints - If a cluster is upgraded to v1.23 in in-place mode, you cannot use egress rules because the node OS is not upgraded. In this case, reset the node. -Precautions ------------ - -If no network policies have been configured for a workload, such as **workload-1**, other workloads in the same cluster can access **workload-1**. - Using Ingress Rules ------------------- @@ -87,9 +88,9 @@ Using Ingress Rules - protocol: TCP port: 6379 - :ref:`Figure 2 ` shows how namespaceSelector selects ingress sources. + :ref:`Figure 2 ` shows how namespaceSelector selects ingress sources. - .. _cce_01_0059__en-us_topic_0249851123_fig127351855617: + .. _cce_01_0059__en-us_topic_0000001199501178_en-us_topic_0249851123_fig127351855617: .. figure:: /_static/images/en-us_image_0259558489.png :alt: **Figure 2** namespaceSelector @@ -171,46 +172,37 @@ Diagram: **Figure 4** Using both ingress and egress -Adding a Network Policy on the Console --------------------------------------- +Creating a Network Policy on the Console +---------------------------------------- -#. Log in to the CCE console. In the navigation pane, choose **Resource Management** > **Network**. On the **Network Policies** tab page, click **Create Network Policy**. +#. Log in to the CCE console and access the cluster details page. +#. Choose **Networking** in the navigation pane, click the **Network Policies** tab, and click **Create Network Policy** in the upper right corner. - - **Network Policy Name**: Specify a network policy name. - - - **Cluster Name**: Select a cluster to which the network policy belongs. + - **Policy Name**: Specify a network policy name. - **Namespace**: Select a namespace in which the network policy is applied. - - **Workload** + - **Selector**: Enter a label, select the pod to be associated, and click **Add**. You can also click **Reference Workload Label** to reference the label of an existing workload. - Click **Select Workload**. In the dialog box displayed, select a workload for which the network policy is to be created, for example, **workload-1**. Then, click **OK**. + - **Inbound Rule**: Click |image1| to add an inbound rule. For details about parameter settings, see :ref:`Table 1 `. - - **Rules**: Click **Add Rule**, set the parameters listed in :ref:`Table 1 `, and click **OK**. + |image2| - .. _cce_01_0059__table26919378234: + .. _cce_01_0059__en-us_topic_0000001199501178_table166419994515: .. table:: **Table 1** Parameters for adding a rule - +-----------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | Parameter | Description | - +===================================+================================================================================================================================================================+ - | Direction | Only **Inbound** is supported, indicating that the whitelisted workloads access the current workload (**workload-1** in this example). | - +-----------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | Protocol | Select a protocol. Currently, the TCP and UDP protocols are supported. The ICMP protocol is not supported. | - +-----------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | Destination Container Port | Specify a port on which the workload in the container image listens. The Nginx application listens on port 80. | - | | | - | | If no container port is specified, all ports can be accessed by default. | - +-----------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | Whitelisted Workloads | Select other workloads that can access the current workload. These workloads will access the current workload at the destination container port. | - | | | - | | - **Namespace**: All workloads in the selected namespace(s) are added to the whitelist. That is, all workloads in the namespace(s) can access **workload-1**. | - | | - **Workload**: The selected workloads can access **workload-1**. Only other workloads in the same namespace as **workload-1** can be selected. | - +-----------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------+ + +------------------+--------------------------------------------------------------------------+ + | Parameter | Description | + +==================+==========================================================================+ + | Protocol & Port | Select the protocol type and port. Currently, TCP and UDP are supported. | + +------------------+--------------------------------------------------------------------------+ + | Source Namespace | Select a namespace whose objects can be accessed. | + +------------------+--------------------------------------------------------------------------+ + | Source Pod Label | Select the pod that allows this label to access. | + +------------------+--------------------------------------------------------------------------+ -#. Click **Create**. +#. Click **OK**. -#. Repeat the preceding steps to add more network policies for the current workload when other ports need to be accessed by some workloads. - - After the network policies are created, only the specified workloads or workloads in the specified namespaces can access the current workload. +.. |image1| image:: /_static/images/en-us_image_0000001251716033.png +.. |image2| image:: /_static/images/en-us_image_0000001207036074.png diff --git a/umn/source/node_pools/creating_a_node_pool.rst b/umn/source/node_pools/creating_a_node_pool.rst index eab7a37..3f0ee6a 100644 --- a/umn/source/node_pools/creating_a_node_pool.rst +++ b/umn/source/node_pools/creating_a_node_pool.rst @@ -92,13 +92,7 @@ To create a node pool in a cluster, perform the following steps: In a CCE Turbo cluster, an AZ is randomly selected from available AZs, and all nodes are created in the selected AZ. - - **Specifications**: Select node specifications that best fit your business needs. - - - **General-purpose**: provides a balance of computing, memory, and network resources. It is a good choice for many applications, such as web servers, workload development, workload testing, and small-scale databases. - - **Memory-optimized**: provides higher memory capacity than general-purpose nodes and is suitable for relational databases, NoSQL, and other workloads that are both memory-intensive and data-intensive. - - **GPU-accelerated**: provides powerful floating-point computing and is suitable for real-time, highly concurrent massive computing. Graphical processing units (GPUs) of P series are suitable for deep learning, scientific computing, and CAE. GPUs of G series are suitable for 3D animation rendering and CAD. **GPU-accelerated nodes can be created only in clusters of v1.11 or later**. GPU-accelerated nodes are available only in certain regions. - - **General computing-plus**: provides stable performance and exclusive resources to enterprise-class workloads with high and stable computing performance. - - **Disk-intensive**: supports :ref:`local disk storage ` and provides high network performance. It is designed for workloads requiring high throughput and data switching, such as big data workloads. + - **Specifications**: Select the node specifications based on service requirements. The available node specifications vary depending on AZs. To ensure node stability, CCE automatically reserves some resources to run necessary system components. For details, see :ref:`Formula for Calculating the Reserved Resources of a Node `. @@ -122,7 +116,7 @@ To create a node pool in a cluster, perform the following steps: - **System Disk**: Set the system disk space of the worker node. The value ranges from 40GB to 1024 GB. The default value is 40GB. - By default, system disks support Common I/O (SATA), High I/O (SAS), and Ultra-high I/O (SSD)High I/O (SAS) and Ultra-high I/O (SSD) EVS disks. + By default, system disks support Common I/O (SATA), High I/O (SAS), and Ultra-high I/O (SSD) EVS disks. **Encryption**: Data disk encryption safeguards your data. Snapshots generated from encrypted disks and disks created using these snapshots automatically inherit the encryption function. **This function is available only in certain regions.** @@ -225,6 +219,10 @@ To create a node pool in a cluster, perform the following steps: - **Subnet IP Address**: Select **Automatically assign IP address** (recommended) or **Manually assigning IP addresses**. + .. note:: + + When you **manually assign IPs**, the master IP is randomly specified. Therefore, it may conflict with the worker node IP. If you prefer the manual operation, you are advised to select a subnet CIDR block different from that of the master node when setting worker node :ref:`subnet `. + #. **Advanced Kubernetes Settings** (optional): Click |image5| to show advanced Kubernetes settings. - **Max Pods**: maximum number of pods that can be created on a node, including the system's default pods. If the cluster uses the **VPC network model**, the maximum value is determined by the number of IP addresses that can be allocated to containers on each node. @@ -272,13 +270,7 @@ Procedure - for CCE Turbo Clusters For details about common containers and secure containers, see :ref:`Secure Containers and Common Containers `. - - **Specifications**: Select node specifications that best fit your business needs. - - - **General-purpose**: provides a balance of computing, memory, and network resources. It is a good choice for many applications, such as web servers, workload development, workload testing, and small-scale databases. - - **Memory-optimized**: provides higher memory capacity than general-purpose nodes and is suitable for relational databases, NoSQL, and other workloads that are both memory-intensive and data-intensive. - - **GPU-accelerated**: provides powerful floating-point computing and is suitable for real-time, highly concurrent massive computing. Graphical processing units (GPUs) of P series are suitable for deep learning, scientific computing, and CAE. GPUs of G series are suitable for 3D animation rendering and CAD. **GPU-accelerated nodes can be created only in clusters of v1.11 or later**. GPU-accelerated nodes are available only in certain regions. - - **General computing-plus**: provides stable performance and exclusive resources to enterprise-class workloads with high and stable computing performance. - - **Disk-intensive**: supports :ref:`local disk storage ` and provides high network performance. It is designed for workloads requiring high throughput and data switching, such as big data workloads. + - **Specifications**: Select the node specifications based on service requirements. The available node specifications vary depending on AZs. To ensure node stability, CCE automatically reserves some resources to run necessary system components. For details, see :ref:`Formula for Calculating the Reserved Resources of a Node `. @@ -305,7 +297,7 @@ Procedure - for CCE Turbo Clusters - **System Disk**: Set the system disk space of the worker node. The value ranges from 40GB to 1024 GB. The default value is 50 GB. - By default, system disks support Common I/O (SATA), High I/O (SAS), and Ultra-high I/O (SSD)High I/O (SAS) and Ultra-high I/O (SSD) EVS disks. + By default, system disks support Common I/O (SATA), High I/O (SAS), and Ultra-high I/O (SSD) EVS disks. - **Data Disk**: Set the data disk space of the worker node. The value ranges from 100 GB to 32,768 GB. The default value is 100 GB. The data disk space size is determined by your service requirements. For details, see :ref:`Data Disk Space Allocation `. diff --git a/umn/source/node_pools/node_pool_overview.rst b/umn/source/node_pools/node_pool_overview.rst index 3b57594..1c02723 100644 --- a/umn/source/node_pools/node_pool_overview.rst +++ b/umn/source/node_pools/node_pool_overview.rst @@ -40,6 +40,8 @@ CCE provides the following extended attributes for node pools: - Node pool OS - Maximum number of pods on each node in a node pool +.. _cce_01_0081__section16928123042115: + Description of DefaultPool -------------------------- diff --git a/umn/source/nodes/creating_a_node.rst b/umn/source/nodes/creating_a_node.rst index 6f4d620..d2c5bdb 100644 --- a/umn/source/nodes/creating_a_node.rst +++ b/umn/source/nodes/creating_a_node.rst @@ -48,13 +48,7 @@ Procedure - **Node Name**: Enter a node name. A node name contains 1 to 56 characters starting with a lowercase letter and not ending with a hyphen (-). Only lowercase letters, digits, and hyphens (-) are allowed. - - **Specifications**: Select node specifications that best fit your business needs. - - - **General-purpose**: provides a balance of computing, memory, and network resources. It is a good choice for many applications, such as web servers, workload development, workload testing, and small-scale databases. - - **Memory-optimized**: provides higher memory capacity than general-purpose nodes and is suitable for relational databases, NoSQL, and other workloads that are both memory-intensive and data-intensive. - - **GPU-accelerated**: provides powerful floating-point computing and is suitable for real-time, highly concurrent massive computing. Graphical processing units (GPUs) of P series are suitable for deep learning, scientific computing, and CAE. GPUs of G series are suitable for 3D animation rendering and CAD. **GPU-accelerated nodes can be created only in clusters of v1.11 or later**. GPU-accelerated nodes are available only in certain regions. - - **General computing-plus**: provides stable performance and exclusive resources to enterprise-class workloads with high and stable computing performance. - - **Disk-intensive**: supports :ref:`local disk storage ` and provides high network performance. It is designed for workloads requiring high throughput and data switching, such as big data workloads. + - **Specifications**: Select the node specifications based on service requirements. The available node specifications vary depending on AZs. To ensure node stability, CCE automatically reserves some resources to run necessary system components. For details, see :ref:`Formula for Calculating the Reserved Resources of a Node `. @@ -64,7 +58,7 @@ Procedure - **System Disk**: Set the system disk space of the worker node. The value ranges from 40GB to 1024 GB. The default value is 40GB. - By default, system disks support Common I/O (SATA), High I/O (SAS), and Ultra-high I/O (SSD)High I/O (SAS) and Ultra-high I/O (SSD) EVS disks. + By default, system disks support Common I/O (SATA), High I/O (SAS), and Ultra-high I/O (SSD) EVS disks. **Encryption**: Data disk encryption safeguards your data. Snapshots generated from encrypted disks and disks created using these snapshots automatically inherit the encryption function. **This function is available only in certain regions.** @@ -189,6 +183,10 @@ Procedure - **Subnet IP Address**: Select **Automatically assign IP address** (recommended) or **Manually assigning IP addresses**. + .. note:: + + When you **manually assign IPs**, the master IP is randomly specified. Therefore, it may conflict with the worker node IP. If you prefer the manual operation, you are advised to select a subnet CIDR block different from that of the master node when setting worker node :ref:`subnet `. + #. **Advanced Kubernetes Settings**: (Optional) Click |image4| to show advanced cluster settings. - **Max Pods**: maximum number of pods that can be created on a node, including the system's default pods. If the cluster uses the **VPC network model**, the maximum value is determined by the number of IP addresses that can be allocated to containers on each node. diff --git a/umn/source/nodes/creating_a_node_in_a_cce_turbo_cluster.rst b/umn/source/nodes/creating_a_node_in_a_cce_turbo_cluster.rst index 8c28601..bb6a229 100644 --- a/umn/source/nodes/creating_a_node_in_a_cce_turbo_cluster.rst +++ b/umn/source/nodes/creating_a_node_in_a_cce_turbo_cluster.rst @@ -43,7 +43,7 @@ After a CCE Turbo cluster is created, you can create nodes for the cluster. | Container runtime | Container runtime used on the node. Different container runtimes support different node specifications and cannot be changed after the node is created. | | | | | | - **runc**: The runC runtime is used. By default, Docker is selected as the container engine when you create a container on the console. | - | | - kata: The Kata runtime is used. If you select this type for both nodes and workloads, the workloads run only on the nodes that use the Kata runtime. containerd is used by default. | + | | - **kata**: The Kata runtime is used. If you select this type for both nodes and workloads, the workloads run only on the nodes that use the Kata runtime. containerd is used by default. | | | | | | For details about common containers and secure containers, see :ref:`Secure Containers and Common Containers `. | +-----------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ diff --git a/umn/source/permissions_management/cluster_permissions_iam-based.rst b/umn/source/permissions_management/cluster_permissions_iam-based.rst index a10763c..f77b23d 100644 --- a/umn/source/permissions_management/cluster_permissions_iam-based.rst +++ b/umn/source/permissions_management/cluster_permissions_iam-based.rst @@ -34,7 +34,7 @@ Process Flow Create a user group and assign permissions to it. - Create a user group on the IAM console, and assign CCE permissions, for example, the CCE Viewer policy to the group. + Create a user group on the IAM console, and assign CCE permissions, for example, the **CCE ReadOnlyAccess** policy to the group. .. note:: @@ -48,8 +48,27 @@ Process Flow Log in to the management console as the user you created, and verify that the user has the assigned permissions. - - Log in to the management console and switch to the CCE console. Click **Create** **Cluster** in the upper right corner. If you fail to do so (assuming that only the CCE Viewer role is assigned), the permission control policy takes effect. - - Switch to the console of any other service. If a message appears indicating that you do not have the required permissions to access the service, the CCE Viewer policy takes effect. + - Log in to the management console and switch to the CCE console. Click **Create** **Cluster** in the upper right corner. If you fail to do so (assuming that only the CCE ReadOnlyAccess role is assigned), the permission policy takes effect. + - Switch to the console of any other service. If a message appears indicating that you do not have the required permissions to access the service, the **CCE ReadOnlyAccess** policy takes effect. + +System-defined Roles +-------------------- + +Roles are a type of coarse-grained authorization mechanism that defines service-level permissions based on user responsibilities. This mechanism provides only a limited number of service-level roles for authorization. However, roles are not an ideal choice for fine-grained authorization and secure access control. + +The preset system role for CCE in IAM is **CCE Administrator**. When assigning this role to a user group, you must also assign other roles and policies on which this role depends, such as **Tenant Guest**, **Server Administrator**, and **ELB Administrator**. + +System-defined Policies +----------------------- + +The system policies preset for CCE in IAM are **CCE FullAccess** and **CCE ReadOnlyAccess**. + +- **CCE FullAccess**: common operation permissions on CCE cluster resources, excluding the namespace-level permissions for the clusters (with Kubernetes RBAC enabled) and the privileged administrator operations, such as agency configuration and cluster certificate generation +- **CCE ReadOnlyAccess**: permissions to view CCE cluster resources, excluding the namespace-level permissions of the clusters (with Kubernetes RBAC enabled) + +.. note:: + + The **CCE Admin** and **CCE Viewer** roles will be discarded soon. You are advised to use **CCE FullAccess** and **CCE ReadOnlyAccess**. .. _cce_01_0188__section1437818291149: diff --git a/umn/source/permissions_management/namespace_permissions_kubernetes_rbac-based.rst b/umn/source/permissions_management/namespace_permissions_kubernetes_rbac-based.rst index 3e33bb8..f301560 100644 --- a/umn/source/permissions_management/namespace_permissions_kubernetes_rbac-based.rst +++ b/umn/source/permissions_management/namespace_permissions_kubernetes_rbac-based.rst @@ -42,27 +42,27 @@ Users with different cluster permissions (assigned using IAM) have different nam .. table:: **Table 1** Differences in namespace permissions - +------------------------------------------------+----------------------------------+-----------------------------------------------------------------------------------------------------------------------------------+ - | User | Clusters Earlier Than v1.11.7-r2 | Clusters of v1.11.7-r2 | - +================================================+==================================+===================================================================================================================================+ - | User with the Tenant Administrator permissions | All namespace permissions | - Has all namespace permissions when using CCE on the console. | - | | | - Requires Kubernetes RBAC authorization when using CCE via :ref:`kubectl `. | - | | | | - | | | .. note:: | - | | | | - | | | When such a user accesses the CCE console, an administrator group is added. Therefore, the user has all namespace permissions. | - +------------------------------------------------+----------------------------------+-----------------------------------------------------------------------------------------------------------------------------------+ - | IAM user with the CCE Administrator role | All namespace permissions | - Has all namespace permissions when using CCE on the console. | - | | | - Requires Kubernetes RBAC authorization when using CCE via :ref:`kubectl `. | - | | | | - | | | .. note:: | - | | | | - | | | When such a user accesses the CCE console, an administrator group is added. Therefore, the user has all namespace permissions. | - +------------------------------------------------+----------------------------------+-----------------------------------------------------------------------------------------------------------------------------------+ - | IAM user with the CCE Viewer role | All namespace permissions | Requires Kubernetes RBAC authorization. | - +------------------------------------------------+----------------------------------+-----------------------------------------------------------------------------------------------------------------------------------+ - | IAM user with the Tenant Guest role | All namespace permissions | Requires Kubernetes RBAC authorization. | - +------------------------------------------------+----------------------------------+-----------------------------------------------------------------------------------------------------------------------------------+ + +-------------------------------------------------------------+----------------------------------+-----------------------------------------------------------------------------------------------------------------------------------+ + | User | Clusters Earlier Than v1.11.7-r2 | Clusters of v1.11.7-r2 | + +=============================================================+==================================+===================================================================================================================================+ + | User with the Tenant Administrator permissions | All namespace permissions | - Has all namespace permissions when using CCE on the console. | + | | | - Requires Kubernetes RBAC authorization when using CCE via :ref:`kubectl `. | + | | | | + | | | .. note:: | + | | | | + | | | When such a user accesses the CCE console, an administrator group is added. Therefore, the user has all namespace permissions. | + +-------------------------------------------------------------+----------------------------------+-----------------------------------------------------------------------------------------------------------------------------------+ + | IAM user with the CCE Administrator role | All namespace permissions | - Has all namespace permissions when using CCE on the console. | + | | | - Requires Kubernetes RBAC authorization when using CCE via :ref:`kubectl `. | + | | | | + | | | .. note:: | + | | | | + | | | When such a user accesses the CCE console, an administrator group is added. Therefore, the user has all namespace permissions. | + +-------------------------------------------------------------+----------------------------------+-----------------------------------------------------------------------------------------------------------------------------------+ + | IAM user with the CCE FullAccess or CCE ReadOnlyAccess role | All namespace permissions | Requires Kubernetes RBAC authorization. | + +-------------------------------------------------------------+----------------------------------+-----------------------------------------------------------------------------------------------------------------------------------+ + | IAM user with the Tenant Guest role | All namespace permissions | Requires Kubernetes RBAC authorization. | + +-------------------------------------------------------------+----------------------------------+-----------------------------------------------------------------------------------------------------------------------------------+ Prerequisites ------------- diff --git a/umn/source/permissions_management/permissions_overview.rst b/umn/source/permissions_management/permissions_overview.rst index 8053ca5..5af2529 100644 --- a/umn/source/permissions_management/permissions_overview.rst +++ b/umn/source/permissions_management/permissions_overview.rst @@ -47,27 +47,27 @@ Users with different cluster permissions (assigned using IAM) have different nam .. table:: **Table 1** Differences in namespace permissions - +------------------------------------------------+----------------------------------+-----------------------------------------------------------------------------------------------------------------------------------+ - | User | Clusters Earlier Than v1.11.7-r2 | Clusters of v1.11.7-r2 | - +================================================+==================================+===================================================================================================================================+ - | User with the Tenant Administrator permissions | All namespace permissions | - Has all namespace permissions when using CCE on the console. | - | | | - Requires Kubernetes RBAC authorization when using CCE via :ref:`kubectl `. | - | | | | - | | | .. note:: | - | | | | - | | | When such a user accesses the CCE console, an administrator group is added. Therefore, the user has all namespace permissions. | - +------------------------------------------------+----------------------------------+-----------------------------------------------------------------------------------------------------------------------------------+ - | IAM user with the CCE Administrator role | All namespace permissions | - Has all namespace permissions when using CCE on the console. | - | | | - Requires Kubernetes RBAC authorization when using CCE via :ref:`kubectl `. | - | | | | - | | | .. note:: | - | | | | - | | | When such a user accesses the CCE console, an administrator group is added. Therefore, the user has all namespace permissions. | - +------------------------------------------------+----------------------------------+-----------------------------------------------------------------------------------------------------------------------------------+ - | IAM user with the CCE Viewer role | All namespace permissions | Requires Kubernetes RBAC authorization. | - +------------------------------------------------+----------------------------------+-----------------------------------------------------------------------------------------------------------------------------------+ - | IAM user with the Tenant Guest role | All namespace permissions | Requires Kubernetes RBAC authorization. | - +------------------------------------------------+----------------------------------+-----------------------------------------------------------------------------------------------------------------------------------+ + +-------------------------------------------------------------+----------------------------------+-----------------------------------------------------------------------------------------------------------------------------------+ + | User | Clusters Earlier Than v1.11.7-r2 | Clusters of v1.11.7-r2 | + +=============================================================+==================================+===================================================================================================================================+ + | User with the Tenant Administrator permissions | All namespace permissions | - Has all namespace permissions when using CCE on the console. | + | | | - Requires Kubernetes RBAC authorization when using CCE via :ref:`kubectl `. | + | | | | + | | | .. note:: | + | | | | + | | | When such a user accesses the CCE console, an administrator group is added. Therefore, the user has all namespace permissions. | + +-------------------------------------------------------------+----------------------------------+-----------------------------------------------------------------------------------------------------------------------------------+ + | IAM user with the CCE Administrator role | All namespace permissions | - Has all namespace permissions when using CCE on the console. | + | | | - Requires Kubernetes RBAC authorization when using CCE via :ref:`kubectl `. | + | | | | + | | | .. note:: | + | | | | + | | | When such a user accesses the CCE console, an administrator group is added. Therefore, the user has all namespace permissions. | + +-------------------------------------------------------------+----------------------------------+-----------------------------------------------------------------------------------------------------------------------------------+ + | IAM user with the CCE FullAccess or CCE ReadOnlyAccess role | All namespace permissions | Requires Kubernetes RBAC authorization. | + +-------------------------------------------------------------+----------------------------------+-----------------------------------------------------------------------------------------------------------------------------------+ + | IAM user with the Tenant Guest role | All namespace permissions | Requires Kubernetes RBAC authorization. | + +-------------------------------------------------------------+----------------------------------+-----------------------------------------------------------------------------------------------------------------------------------+ kubectl Permissions ------------------- diff --git a/umn/source/product_bulletin/index.rst b/umn/source/product_bulletin/index.rst index e7ac65b..beba27b 100644 --- a/umn/source/product_bulletin/index.rst +++ b/umn/source/product_bulletin/index.rst @@ -1,6 +1,6 @@ -:original_name: cce_01_0236.html +:original_name: cce_bulletin_0000.html -.. _cce_01_0236: +.. _cce_bulletin_0000: Product Bulletin ================ @@ -10,6 +10,7 @@ Product Bulletin - :ref:`CCE Cluster Version Release Notes ` - :ref:`OS Patch Notes for Cluster Nodes ` - :ref:`Security Vulnerability Responses ` +- :ref:`Service Account Token Security Improvement ` .. toctree:: :maxdepth: 1 @@ -20,3 +21,4 @@ Product Bulletin cce_cluster_version_release_notes os_patch_notes_for_cluster_nodes security_vulnerability_responses/index + service_account_token_security_improvement diff --git a/umn/source/product_bulletin/os_patch_notes_for_cluster_nodes.rst b/umn/source/product_bulletin/os_patch_notes_for_cluster_nodes.rst index 14152ca..ad61a56 100644 --- a/umn/source/product_bulletin/os_patch_notes_for_cluster_nodes.rst +++ b/umn/source/product_bulletin/os_patch_notes_for_cluster_nodes.rst @@ -16,9 +16,9 @@ CCE nodes in Hybrid clusters can run on EulerOS 2.2, EulerOS 2.5, EulerOS 2.9 an OS Patch ========================= ========================================= EulerOS release 2.0 (SP2) 3.10.0-327.62.59.83.h128.x86_64 - EulerOS release 2.0 (SP5) 3.10.0-862.14.1.5.h591.eulerosv2r7.x86_64 - EulerOS release 2.0 (SP9) 4.18.0-147.5.1.6.h541.eulerosv2r9.x86_64 - CentOS Linux release 7.7 3.10.0-1062.18.1.el7.x86_64 + EulerOS release 2.0 (SP5) 3.10.0-862.14.1.5.h687.eulerosv2r7.x86_64 + EulerOS release 2.0 (SP9) 4.18.0-147.5.1.6.h766.eulerosv2r9.x86_64 + CentOS Linux release 7.7 3.10.0-1160.76.1.el7.x86_64 ========================= ========================================= The OS patches and verification results will be updated from time to time. You can update the operating system based on your needs. diff --git a/umn/source/product_bulletin/service_account_token_security_improvement.rst b/umn/source/product_bulletin/service_account_token_security_improvement.rst new file mode 100644 index 0000000..9f6ba3c --- /dev/null +++ b/umn/source/product_bulletin/service_account_token_security_improvement.rst @@ -0,0 +1,41 @@ +:original_name: cce_10_0477.html + +.. _cce_10_0477: + +Service Account Token Security Improvement +========================================== + +In clusters earlier than v1.21, a token is obtained by mounting the secret of the service account to a pod. Tokens obtained this way are permanent. This approach is no longer recommended starting from version 1.21. Service accounts will stop auto creating secrets in clusters from version 1.25. + +In clusters of version 1.21 or later, you can use the `TokenRequest `__ API to obtain the token and use the projected volume to mount the token to the pod. Such tokens are valid for a fixed period (one hour by default). Before expiration, Kubelet refreshes the token to ensure that the pod always uses a valid token. When the mounting pod is deleted, the token automatically becomes invalid. This approach is implemented by the `BoundServiceAccountTokenVolume `__ feature to improve the token security of the service account. Kubernetes clusters of v1.21 and later enables this approach by default. + +For smooth transition, the community extends the token validity period to one year by default. After one year, the token becomes invalid, and clients that do not support certificate reloading cannot access the API server. It is recommended that clients of earlier versions be upgraded as soon as possible. Otherwise, service faults may occur. + +If you use a Kubernetes client of a to-be-outdated version, the certificate reloading may fail. Versions of officially supported Kubernetes client libraries able to reload tokens are as follows: + +- Go: >= v0.15.7 +- Python: >= v12.0.0 +- Java: >= v9.0.0 +- Javascript: >= v0.10.3 +- Ruby: master branch +- Haskell: v0.3.0.0 +- C#: >= 7.0.5 + +For details, visit https://github.com/kubernetes/enhancements/tree/master/keps/sig-auth/1205-bound-service-account-tokens. + +.. note:: + + If you need a token that never expires, you can also `manually manage secrets for service accounts `__. Although a permanent service account token can be manually created, you are advised to use a short-lived token by calling the `TokenRequest `__ API for higher security. + +Diagnosis +--------- + +Run the following steps to check your CCE clusters of v1.21 and later: + +#. Use kubectl to connect to the cluster and run the **kubectl get --raw "/metrics" \| grep stale** command to query the metrics. Check the metric named **serviceaccount_stale_tokens_total**. + + If the value is greater than 0, some workloads in the cluster may be using an earlier client-go version. In this case, check whether this problem occurs in your deployed applications. If yes, upgrade client-go to the version specified by the community as soon as possible. The version must be at least two major versions of the CCE cluster. For example, if your cluster version is 1.23, the Kubernetes dependency library version must be at least 1.19. + + |image1| + +.. |image1| image:: /_static/images/en-us_image_0000001402494682.png diff --git a/umn/source/reference/index.rst b/umn/source/reference/index.rst index 54ce71a..8687eb5 100644 --- a/umn/source/reference/index.rst +++ b/umn/source/reference/index.rst @@ -5,7 +5,6 @@ Reference ========= -- :ref:`Checklist for Migrating Containerized Applications to the Cloud ` - :ref:`How Do I Troubleshoot Insufficient EIPs When a Node Is Added? ` - :ref:`How Do I Format a Data Disk Using Command Line Injection? ` - :ref:`How Do I Use heapster in Clusters of v1.13.10? ` @@ -16,17 +15,13 @@ Reference - :ref:`How Do I Add a Second Data Disk to a Node in a CCE Cluster? ` - :ref:`Workload Abnormalities ` - :ref:`What Should I Do If a Service Released in a Workload Cannot Be Accessed from Public Networks? ` -- :ref:`Selecting a Network Model When Creating a Cluster on CCE ` -- :ref:`Planning CIDR Blocks for a CCE Cluster ` - :ref:`What Is the Relationship Between Clusters, VPCs, and Subnets? ` -- :ref:`How Do I Change the Storage Class Used by a Cluster of v1.15 from FlexVolume to CSI Everest? ` - :ref:`How Do I Harden the VPC Security Group Rules for CCE Cluster Nodes? ` .. toctree:: :maxdepth: 1 :hidden: - checklist_for_migrating_containerized_applications_to_the_cloud how_do_i_troubleshoot_insufficient_eips_when_a_node_is_added how_do_i_format_a_data_disk_using_command_line_injection how_do_i_use_heapster_in_clusters_of_v1.13.10 @@ -37,8 +32,5 @@ Reference how_do_i_add_a_second_data_disk_to_a_node_in_a_cce_cluster workload_abnormalities/index what_should_i_do_if_a_service_released_in_a_workload_cannot_be_accessed_from_public_networks - selecting_a_network_model_when_creating_a_cluster_on_cce - planning_cidr_blocks_for_a_cce_cluster what_is_the_relationship_between_clusters_vpcs_and_subnets - how_do_i_change_the_storage_class_used_by_a_cluster_of_v1.15_from_flexvolume_to_csi_everest how_do_i_harden_the_vpc_security_group_rules_for_cce_cluster_nodes diff --git a/umn/source/reference/planning_cidr_blocks_for_a_cce_cluster.rst b/umn/source/reference/planning_cidr_blocks_for_a_cce_cluster.rst deleted file mode 100644 index 157fcbd..0000000 --- a/umn/source/reference/planning_cidr_blocks_for_a_cce_cluster.rst +++ /dev/null @@ -1,143 +0,0 @@ -:original_name: cce_bestpractice_00004.html - -.. _cce_bestpractice_00004: - -Planning CIDR Blocks for a CCE Cluster -====================================== - -Before creating a cluster on CCE, determine the number of VPCs, number of subnets, container CIDR blocks, and Services for access based on service requirements. - -This section describes the functions of various addresses in a CCE cluster in a VPC and how to plan CIDR blocks. - -Basic Concepts --------------- - -**VPC CIDR Block** - -Virtual Private Cloud (VPC) enables you to provision logically isolated, configurable, and manageable virtual networks for cloud servers, cloud containers, and cloud databases. You have complete control over your virtual network, including selecting your own CIDR block, creating subnets, and configuring security groups. You can also assign EIPs and allocate bandwidth in your VPC for secure and easy access to your business system. - -**Subnet CIDR Block** - -A subnet is a network that manages ECS network planes. It supports IP address management and DNS. The IP addresses of all ECSs in a subnet belong to the subnet. - - -.. figure:: /_static/images/en-us_image_0000001223152421.png - :alt: **Figure 1** VPC CIDR block architecture - - **Figure 1** VPC CIDR block architecture - -By default, ECSs in all subnets of the same VPC can communicate with one another, while ECSs in different VPCs cannot communicate with each other. - -You can create VPC peering connections to enable ECSs in different VPCs to communicate with one another. - -**Container (Pod) CIDR Block** - -Pod is a Kubernetes object. Each pod has an IP address. - -When creating a cluster on CCE, you can specify the pod (container) CIDR block, which cannot overlap with the subnet CIDR block. For example, if the subnet CIDR block is 192.168.0.0/16, the container CIDR block cannot be 192.168.0.0/18 or 192.168.1.0/18, because these addresses are included in 192.168.0.0/16. - -**Service CIDR Block** - -Service is also a Kubernetes object. Each Service has an address. When creating a cluster on CCE, you can specify the Service CIDR block. Similarly, the Service CIDR block cannot overlap with the subnet CIDR block or the container CIDR block. The Service CIDR block can be used only within a cluster. - -For details about the relationship between these CIDR blocks, see :ref:`Figure 2 `. - -How Do I Select a CIDR Block? ------------------------------ - -**Single-VPC Single-Cluster Scenarios** - -These are the simplest scenarios. The VPC CIDR block is determined when the VPC is created. When creating a CCE cluster, select a CIDR block different from that of the current VPC. - -.. _cce_bestpractice_00004__en-us_topic_0261817695_en-us_topic_0099587154_fig15791152874920: - -.. figure:: /_static/images/en-us_image_0000001223152417.png - :alt: **Figure 2** CIDR block in the single-VPC single-cluster scenario - - **Figure 2** CIDR block in the single-VPC single-cluster scenario - -**Single-VPC Multi-Cluster Scenarios** - -Multiple CCE clusters are created in a VPC. - -In the **VPC network** mode, pod packets are forwarded through VPC routes. CCE automatically configures a routing table on the VPC routes to each container CIDR block. - -Pay attention to the following: - -- The VPC address is determined during VPC creation. When creating a cluster, select a CIDR block for each cluster that does not overlap with the VPC CIDR block or other container CIDR blocks. -- The container CIDR blocks of all clusters cannot overlap, but the Service CIDR blocks can. In this case, CCE clusters are partially interconnected. A pod of a cluster can directly access the pods of another cluster, but cannot access the Services of the cluster. -- The network scale is limited by the VPC route table. - - -.. figure:: /_static/images/en-us_image_0000001178034110.png - :alt: **Figure 3** VPC network - multi-cluster scenario - - **Figure 3** VPC network - multi-cluster scenario - -In the tunnel network model, the container network is an overlay network plane deployed over the VPC network. Though at some cost of performance, the tunnel encapsulation enables higher interoperability and compatibility with advanced features (such as network policy-based isolation), meeting the requirements of most applications. - - -.. figure:: /_static/images/en-us_image_0000001178192670.png - :alt: **Figure 4** Tunnel network - multi-cluster scenario - - **Figure 4** Tunnel network - multi-cluster scenario - -Pay attention to the following: - -- The VPC address is determined during VPC creation. When creating a cluster, select a CIDR block for each cluster that does not overlap with the VPC CIDR block or other container CIDR blocks. -- The container CIDR blocks of all clusters can overlap, so do the Service CIDR blocks. -- It is recommended that ELB be used for the cross-cluster access between containers. - -**VPC Interconnection Scenarios** - -When two VPC networks are interconnected, you can configure the packets to be sent to the peer VPC in the route table. - -In the VPC network model, after creating a peering connection, you need to add routes for the peering connection to enable communication between the two VPCs. - - -.. figure:: /_static/images/en-us_image_0000001223393899.png - :alt: **Figure 5** VPC Network - VPC interconnection scenario - - **Figure 5** VPC Network - VPC interconnection scenario - -To interconnect cluster containers across VPCs, you need to create VPC peering connections. - -Pay attention to the following: - -- The VPC address is determined during VPC creation. When creating a cluster, select a CIDR block for each cluster that does not overlap with the VPC CIDR block or other container CIDR blocks. - -- The container CIDR blocks of all clusters cannot overlap, but the Service CIDR blocks can. - -- Add the peer container CIDR block to the route table of the VPC peering connection. The following is an example: - - - .. figure:: /_static/images/en-us_image_0000001178034114.png - :alt: **Figure 6** Adding the peer container CIDR block to the local route on the VPC console - - **Figure 6** Adding the peer container CIDR block to the local route on the VPC console - -In the tunnel network model, after creating a peering connection, you need to add routes for the peering connection to enable communication between the two VPCs. - - -.. figure:: /_static/images/en-us_image_0000001223473845.png - :alt: **Figure 7** Tunnel network - VPC interconnection scenario - - **Figure 7** Tunnel network - VPC interconnection scenario - -Pay attention to the following: - -- The VPC address is determined during VPC creation. When creating a cluster, select a CIDR block for each cluster that does not overlap with the VPC CIDR block or other container CIDR blocks. - -- The container CIDR blocks of all clusters cannot overlap, but the Service CIDR blocks can. - -- Add the peer subnet CIDR block to the route table of the VPC peering connection. The following is an example: - - - .. figure:: /_static/images/en-us_image_0000001178034116.png - :alt: **Figure 8** Adding the subnet CIDR block of the peer cluster node to the local route on the VPC console - - **Figure 8** Adding the subnet CIDR block of the peer cluster node to the local route on the VPC console - -**VPC-IDC Scenarios** - -Similar to the VPC interconnection scenario, some CIDR blocks in the VPC are routed to the IDC. The pod IP addresses of CCE clusters cannot overlap with the addresses within these CIDR blocks. To access the pod IP addresses in the cluster in the IDC, you need to configure the route table to the private line VBR on the IDC. diff --git a/umn/source/reference/selecting_a_network_model_when_creating_a_cluster_on_cce.rst b/umn/source/reference/selecting_a_network_model_when_creating_a_cluster_on_cce.rst deleted file mode 100644 index a7cf537..0000000 --- a/umn/source/reference/selecting_a_network_model_when_creating_a_cluster_on_cce.rst +++ /dev/null @@ -1,66 +0,0 @@ -:original_name: cce_bestpractice_00162.html - -.. _cce_bestpractice_00162: - -Selecting a Network Model When Creating a Cluster on CCE -======================================================== - -CCE uses high-performance container networking add-ons, which support the tunnel network and VPC network models. - -.. caution:: - - After a cluster is created, the network model cannot be changed. Exercise caution when selecting a network model. - -- **Tunnel network**: The container network is an overlay tunnel network on top of a VPC network and uses the VXLAN technology. This network model is applicable when there is no high requirements on performance. VXLAN encapsulates Ethernet packets as UDP packets for tunnel transmission. Though at some cost of performance, the tunnel encapsulation enables higher interoperability and compatibility with advanced features (such as network policy-based isolation), meeting the requirements of most applications. - - - .. figure:: /_static/images/en-us_image_0000001223393893.png - :alt: **Figure 1** Container tunnel network - - **Figure 1** Container tunnel network - -- **VPC network**: The container network uses VPC routing to integrate with the underlying network. This network model is applicable to performance-intensive scenarios. The maximum number of nodes allowed in a cluster depends on the route quota in a VPC network. Each node is assigned a CIDR block of a fixed size. VPC networks are free from tunnel encapsulation overhead and outperform container tunnel networks. In addition, as VPC routing includes routes to node IP addresses and the container CIDR block, container pods in the cluster can be directly accessed from outside the cluster. - - - .. figure:: /_static/images/en-us_image_0000001178034108.png - :alt: **Figure 2** VPC network - - **Figure 2** VPC network - -The following table lists the differences between the network models. - -.. table:: **Table 1** Network comparison - - +------------------------------+----------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------+ - | Dimension | Tunnel Network | VPC Network | - +==============================+==================================================================================+================================================================================================================+ - | Core component | OVS | IPVlan | - +------------------------------+----------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------+ - | Applicable clusters | Hybrid cluster | Hybrid cluster | - | | | | - | | VM cluster | VM cluster | - +------------------------------+----------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------+ - | Support for network policies | Yes | No | - | | | | - | (networkpolicy) | | | - +------------------------------+----------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------+ - | Support for ENI | No | Yes. The container network is deeply integrated with the VPC network, and ENI is used for pods to communicate. | - +------------------------------+----------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------+ - | IP address management | IP addresses can be migrated. | - Each node is allocated with a small subnet. | - | | | - A static route is added on the VPC router with the next hop set to the node IP address. | - +------------------------------+----------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------+ - | Network performance | Performance loss due to VXLAN tunnel encapsulation | - No performance loss as no tunnel encapsulation is required; performance comparable to bare metal networks | - | | | - Data forwarded across nodes through the VPC router | - +------------------------------+----------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------+ - | Networking scale | A maximum of 2,000 nodes are supported. | Limited by the VPC route table. | - +------------------------------+----------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------+ - | External dependency | None | Static route table of the VPC router | - +------------------------------+----------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------+ - | Application scenarios | - Common container service scenarios | - Scenarios that have high requirements on network latency and bandwidth | - | | - Scenarios that do not have high requirements on network latency and bandwidth | - Containers can communicate with VMs using a microservice registration framework, such as Dubbo and CSE. | - +------------------------------+----------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------+ - -.. important:: - - #. The actual cluster scale is limited by the quota of custom routes of the VPC. Therefore, estimate the number of required nodes before creating a VPC. - #. By default, the VPC network model supports direct communication between containers and hosts in the same VPC. If a peering connection policy is configured between the VPC and another VPC, the containers can directly communicate with hosts on the peer VPC. In addition, in hybrid networking scenarios such as Direct Connect and VPN, communication between containers and hosts on the peer end can also be achieved with proper planning. diff --git a/umn/source/storage_csi/deployment_examples/obs_volumes/using_obs_volumes.rst b/umn/source/storage_csi/deployment_examples/obs_volumes/using_obs_volumes.rst index 9b62f3e..ae78a21 100644 --- a/umn/source/storage_csi/deployment_examples/obs_volumes/using_obs_volumes.rst +++ b/umn/source/storage_csi/deployment_examples/obs_volumes/using_obs_volumes.rst @@ -78,7 +78,7 @@ Creating an OBS Volume | | | | | This parameter indicates the storage classes supported by OBS. | | | | - | | - **Standard**\ : applicable to scenarios where a large number of hotspot files or small-sized files need to be accessed frequently (multiple times per month on average) and require fast access response. | + | | - **Standard**: applicable to scenarios where a large number of hotspot files or small-sized files need to be accessed frequently (multiple times per month on average) and require fast access response. | | | - **Infrequent access**: applicable to scenarios where data is not frequently accessed (less than 12 times per year on average) but requires fast access response. | +-----------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | Storage Policy | Object storage has the following policies: | diff --git a/umn/source/storage_csi/overview.rst b/umn/source/storage_csi/overview.rst index ee45851..b926bc8 100644 --- a/umn/source/storage_csi/overview.rst +++ b/umn/source/storage_csi/overview.rst @@ -155,6 +155,8 @@ Notice on Using Add-ons - If your cluster is upgraded from v1.13 to v1.15, :ref:`storage-driver ` is replaced by everest (v1.1.6 or later) for container storage. The takeover does not affect the original storage functions. - In version 1.2.0 of the everest add-on, **key authentication** is optimized when OBS is used. After the everest add-on is upgraded from a version earlier than 1.2.0, you need to restart all workloads that use OBS in the cluster. Otherwise, workloads may not be able to use OBS. +.. _cce_01_0307__en-us_topic_0000001199181198_section1690993510317: + Differences Between CSI and FlexVolume Plug-ins ----------------------------------------------- diff --git a/umn/source/storage_csi/persistentvolumeclaims_pvcs.rst b/umn/source/storage_csi/persistentvolumeclaims_pvcs.rst index 81bd442..43aafc6 100644 --- a/umn/source/storage_csi/persistentvolumeclaims_pvcs.rst +++ b/umn/source/storage_csi/persistentvolumeclaims_pvcs.rst @@ -269,16 +269,11 @@ The disk type, encryption setting, and disk mode of the created EVS PVC are cons **Using the CCE Console** #. Log in to the CCE console. -#. Go to the cluster details page, choose **Storage** from the navigation pane, and click the **PersistentVolumeClaims (PVCs)** tab. -#. Click **Create PVC** in the upper right corner. In the dialog box displayed, set the PVC parameters. - - - **Creation Mode**: Select **Snapshot**. - - **PVC Name**: name of a PVC. - - **Snapshot**: Select the snapshot to be used. - +#. Go to the cluster details page, choose **Storage** from the navigation pane, and click the **Snapshots and Backups** tab. +#. Locate the snapshot for which you want to create a PVC, click **Create PVC**, and specify the PVC name in the displayed dialog box. #. Click **Create**. -**Using YAML** +**Creating from YAML** .. code-block:: diff --git a/umn/source/storage_csi/storageclass.rst b/umn/source/storage_csi/storageclass.rst index 325250b..36671ef 100644 --- a/umn/source/storage_csi/storageclass.rst +++ b/umn/source/storage_csi/storageclass.rst @@ -122,6 +122,10 @@ For an ultra-high I/O storage class, you can set the class name to **csi-disk-ss - **Delete**: When a PVC is deleted, both the PV and the EVS disk are deleted. - **Retain**: When a PVC is deleted, the PV and underlying storage resources are not deleted. Instead, you must manually delete these resources. After that, the PV resource is in the **Released** state and cannot be bound to the PVC again. +.. note:: + + The reclamation policy set here has no impact on the SFS Turbo storage. Therefore, the yearly/monthly SFS Turbo resources will not be reclaimed when the cluster or PVC is deleted. + If high data security is required, you are advised to select **Retain** to prevent data from being deleted by mistake. After the definition is complete, run the **kubectl create** commands to create storage resources. diff --git a/umn/source/storage_flexvolume/how_do_i_change_the_storage_class_used_by_a_cluster_of_v1.15_from_flexvolume_to_csi_everest.rst b/umn/source/storage_flexvolume/how_do_i_change_the_storage_class_used_by_a_cluster_of_v1.15_from_flexvolume_to_csi_everest.rst index bc3bcb2..b9f4dab 100644 --- a/umn/source/storage_flexvolume/how_do_i_change_the_storage_class_used_by_a_cluster_of_v1.15_from_flexvolume_to_csi_everest.rst +++ b/umn/source/storage_flexvolume/how_do_i_change_the_storage_class_used_by_a_cluster_of_v1.15_from_flexvolume_to_csi_everest.rst @@ -18,7 +18,7 @@ Procedure #. (Optional) Back up data to prevent data loss in case of exceptions. -#. .. _cce_01_0343__en-us_topic_0285037038_li1219802032512: +#. .. _cce_01_0343__cce_bestpractice_0107_li1219802032512: Configure a YAML file of the PV in the CSI format according to the PV in the FlexVolume format and associate the PV with the existing storage. @@ -223,9 +223,9 @@ Procedure | storageClassName | Name of the Kubernetes storage class. Set this field to **csi-sfsturbo** for SFS Turbo volumes. | +----------------------------------+-------------------------------------------------------------------------------------------------------------------------+ -#. .. _cce_01_0343__en-us_topic_0285037038_li1710710385418: +#. .. _cce_01_0343__cce_bestpractice_0107_li1710710385418: - Configure a YAML file of the PVC in the CSI format according to the PVC in the FlexVolume format and associate the PVC with the PV created in :ref:`2 `. + Configure a YAML file of the PVC in the CSI format according to the PVC in the FlexVolume format and associate the PVC with the PV created in :ref:`2 `. To be specific, run the following commands to configure the pvc-example.yaml file, which is used to create a PVC. @@ -268,7 +268,7 @@ Procedure +------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | failure-domain.beta.kubernetes.io/zone | AZ where the EVS disk is deployed. Use the same value as that of the FlexVolume PVC. | +------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | everest.io/disk-volume-type | Storage class of the EVS disk. The value can be **SAS** or **SSD**. Set this parameter to the same value as that of the PV created in :ref:`2 `. | + | everest.io/disk-volume-type | Storage class of the EVS disk. The value can be **SAS** or **SSD**. Set this parameter to the same value as that of the PV created in :ref:`2 `. | +------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | name | PVC name, which must be unique in the namespace. The value must be unique in the namespace. (If the PVC is dynamically created by a stateful application, the value of this parameter must be the same as the name of the FlexVolume PVC.) | +------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ @@ -276,7 +276,7 @@ Procedure +------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | storage | Requested capacity of the PVC, which must be the same as the storage size of the existing PV. | +------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | volumeName | Name of the PV. Set this parameter to the name of the static PV in :ref:`2 `. | + | volumeName | Name of the PV. Set this parameter to the name of the static PV in :ref:`2 `. | +------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | storageClassName | Name of the Kubernetes storage class. Set this field to **csi-disk** for EVS disks. | +------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ @@ -316,7 +316,7 @@ Procedure +------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | storageClassName | Set this field to **csi-nas**. | +------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | volumeName | Name of the PV. Set this parameter to the name of the static PV in :ref:`2 `. | + | volumeName | Name of the PV. Set this parameter to the name of the static PV in :ref:`2 `. | +------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ Configuration example of **a PVC for an OBS volume**: @@ -348,7 +348,7 @@ Procedure +----------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | Parameter | Description | +============================+============================================================================================================================================================================================================================================+ - | everest.io/obs-volume-type | OBS volume type, which can be **STANDARD** (standard bucket) and **WARM** (infrequent access bucket). Set this parameter to the same value as that of the PV created in :ref:`2 `. | + | everest.io/obs-volume-type | OBS volume type, which can be **STANDARD** (standard bucket) and **WARM** (infrequent access bucket). Set this parameter to the same value as that of the PV created in :ref:`2 `. | +----------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | csi.storage.k8s.io/fstype | File type, which can be **obsfs** or **s3fs**. The value must be the same as that of **fsType** of the static OBS volume PV. | +----------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ @@ -360,7 +360,7 @@ Procedure +----------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | storageClassName | Name of the Kubernetes storage class. Set this field to **csi-obs**. | +----------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | volumeName | Name of the PV. Set this parameter to the name of the static PV created in :ref:`2 `. | + | volumeName | Name of the PV. Set this parameter to the name of the static PV created in :ref:`2 `. | +----------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ Configuration example of **a PVC for an SFS Turbo volume**: @@ -398,10 +398,10 @@ Procedure +------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | storage | Storage capacity, in the unit of Gi. The value must be the same as the storage size of the existing PV. | +------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | volumeName | Name of the PV. Set this parameter to the name of the static PV created in :ref:`2 `. | + | volumeName | Name of the PV. Set this parameter to the name of the static PV created in :ref:`2 `. | +------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -#. .. _cce_01_0343__en-us_topic_0285037038_li487255772614: +#. .. _cce_01_0343__cce_bestpractice_0107_li487255772614: Upgrade the workload to use a new PVC. @@ -415,7 +415,7 @@ Procedure .. note:: - Replace the example file name **pvc-example.yaml** in the preceding commands with the names of the YAML files configured in :ref:`2 ` and :ref:`3 `. + Replace the example file name **pvc-example.yaml** in the preceding commands with the names of the YAML files configured in :ref:`2 ` and :ref:`3 `. b. Go to the CCE console. On the workload upgrade page, click **Upgrade** > **Advanced Settings** > **Data Storage** > **Cloud Storage**. @@ -435,7 +435,7 @@ Procedure .. note:: - Replace the example file name **pvc-example.yaml** in the preceding commands with the names of the YAML files configured in :ref:`2 ` and :ref:`3 `. + Replace the example file name **pvc-example.yaml** in the preceding commands with the names of the YAML files configured in :ref:`2 ` and :ref:`3 `. b. Run the **kubectl edit** command to edit the StatefulSet and use the newly created PVC. @@ -473,7 +473,7 @@ Procedure .. note:: - Replace the example file name **pvc-example.yaml** in the preceding commands with the names of the YAML files configured in :ref:`2 ` and :ref:`3 `. + Replace the example file name **pvc-example.yaml** in the preceding commands with the names of the YAML files configured in :ref:`2 ` and :ref:`3 `. e. Change the number of pods back to the original value and wait until the pods are running. @@ -518,7 +518,7 @@ Procedure storage: 10Gi storageClassName: csi-disk - The parameter value must be the same as the PVC of the EVS volume created in :ref:`3 `. + The parameter value must be the same as the PVC of the EVS volume created in :ref:`3 `. Configuration example of **volumeClaimTemplates for an SFS volume**: @@ -537,7 +537,7 @@ Procedure storage: 10Gi storageClassName: csi-nas - The parameter value must be the same as the PVC of the SFS volume created in :ref:`3 `. + The parameter value must be the same as the PVC of the SFS volume created in :ref:`3 `. Configuration example of **volumeClaimTemplates for an OBS volume**: @@ -559,7 +559,7 @@ Procedure storage: 1Gi storageClassName: csi-obs - The parameter value must be the same as the PVC of the OBS volume created in :ref:`3 `. + The parameter value must be the same as the PVC of the OBS volume created in :ref:`3 `. - Delete the StatefulSet. @@ -576,7 +576,7 @@ Procedure .. note:: - If a rollback is required, perform :ref:`4 `. Select the PVC in FlexVolume format and upgrade the application. + If a rollback is required, perform :ref:`4 `. Select the PVC in FlexVolume format and upgrade the application. #. Uninstall the PVC in the FlexVolume format. diff --git a/umn/source/storage_flexvolume/using_evs_disks_as_storage_volumes/kubectl_automatically_creating_an_evs_disk.rst b/umn/source/storage_flexvolume/using_evs_disks_as_storage_volumes/kubectl_automatically_creating_an_evs_disk.rst index a23d4cc..a1ac2ba 100644 --- a/umn/source/storage_flexvolume/using_evs_disks_as_storage_volumes/kubectl_automatically_creating_an_evs_disk.rst +++ b/umn/source/storage_flexvolume/using_evs_disks_as_storage_volumes/kubectl_automatically_creating_an_evs_disk.rst @@ -59,7 +59,7 @@ Procedure +==========================================+======================================================================================================================================+ | volume.beta.kubernetes.io/storage-class | EVS disk type. The value is in lowercase. | | | | - | | Supported values: Common I/O (SATA), High I/O (SAS), and Ultra-high I/O (SSD)High I/O (SAS) and Ultra-high I/O (SSD) | + | | Supported values: Common I/O (SATA), High I/O (SAS), and Ultra-high I/O (SSD) | +------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------+ | failure-domain.beta.kubernetes.io/region | Region where the cluster is located. | | | | diff --git a/umn/source/storage_flexvolume/using_evs_disks_as_storage_volumes/kubectl_creating_a_pv_from_an_existing_evs_disk.rst b/umn/source/storage_flexvolume/using_evs_disks_as_storage_volumes/kubectl_creating_a_pv_from_an_existing_evs_disk.rst index f2dca19..a2e80b1 100644 --- a/umn/source/storage_flexvolume/using_evs_disks_as_storage_volumes/kubectl_creating_a_pv_from_an_existing_evs_disk.rst +++ b/umn/source/storage_flexvolume/using_evs_disks_as_storage_volumes/kubectl_creating_a_pv_from_an_existing_evs_disk.rst @@ -93,7 +93,7 @@ Procedure +------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | storage | EVS volume capacity in the unit of Gi. | +------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | storageClassName | EVS disk type. Supported values: Common I/O (SATA), High I/O (SAS), and Ultra-high I/O (SSD)High I/O (SAS) and Ultra-high I/O (SSD) | + | storageClassName | EVS disk type. Supported values: Common I/O (SATA), High I/O (SAS), and Ultra-high I/O (SSD) | +------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | driver | Storage driver. | | | | @@ -209,7 +209,7 @@ Procedure +------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | storage | EVS volume capacity in the unit of Gi. | +------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | storageClassName | EVS disk type. Supported values: Common I/O (SATA), High I/O (SAS), and Ultra-high I/O (SSD)High I/O (SAS) and Ultra-high I/O (SSD) | + | storageClassName | EVS disk type. Supported values: Common I/O (SATA), High I/O (SAS), and Ultra-high I/O (SSD) | +------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | driver | Storage driver. | | | | @@ -319,7 +319,7 @@ Procedure +------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | storage | EVS volume capacity in the unit of Gi. | +------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | storageClassName | EVS disk type. Supported values: Common I/O (SATA), High I/O (SAS), and Ultra-high I/O (SSD)High I/O (SAS) and Ultra-high I/O (SSD) | + | storageClassName | EVS disk type. Supported values: Common I/O (SATA), High I/O (SAS), and Ultra-high I/O (SSD) | +------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | driver | Storage driver. | | | | diff --git a/umn/source/storage_flexvolume/using_obs_buckets_as_storage_volumes/using_obs_volumes.rst b/umn/source/storage_flexvolume/using_obs_buckets_as_storage_volumes/using_obs_volumes.rst index debd06d..f44a7f1 100644 --- a/umn/source/storage_flexvolume/using_obs_buckets_as_storage_volumes/using_obs_volumes.rst +++ b/umn/source/storage_flexvolume/using_obs_buckets_as_storage_volumes/using_obs_volumes.rst @@ -79,7 +79,7 @@ Creating an OBS Volume | | | | | This parameter indicates the storage classes supported by OBS. | | | | - | | - **Standard**\ : applicable to scenarios where a large number of hotspot files or small-sized files need to be accessed frequently (multiple times per month on average) and require fast access response. | + | | - **Standard**: applicable to scenarios where a large number of hotspot files or small-sized files need to be accessed frequently (multiple times per month on average) and require fast access response. | | | - **Infrequent access**: applicable to scenarios where data is not frequently accessed (less than 12 times per year on average) but requires fast access response. | +-----------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | Storage Policy | Object storage has the following policies: | diff --git a/umn/source/workloads/configuring_a_container/dns_configuration.rst b/umn/source/workloads/configuring_a_container/dns_configuration.rst new file mode 100644 index 0000000..e1d9ff9 --- /dev/null +++ b/umn/source/workloads/configuring_a_container/dns_configuration.rst @@ -0,0 +1,292 @@ +:original_name: cce_01_0133_0.html + +.. _cce_01_0133_0: + +DNS Configuration +================= + +Every Kubernetes cluster has a built-in DNS add-on (CoreDNS) to provide domain name resolution for workloads in the cluster. When handling a high concurrency of DNS queries, CoreDNS may encounter a performance bottleneck, that is, it may fail occasionally to fulfill DNS queries. There are cases when Kubernetes workloads initiate unnecessary DNS queries. This makes DNS overloaded if there are many concurrent DNS queries. Tuning DNS configuration for workloads will reduce the risks of DNS query failures to some extent. + +For more information about DNS, see :ref:`coredns (System Resource Add-on, Mandatory) `. + +DNS Configuration Items +----------------------- + +Run the **cat /etc/resolv.conf** command on a Linux node or container to view the DNS resolver configuration file. The following is an example DNS resolver configuration of a container in a Kubernetes cluster: + +.. code-block:: + + nameserver 10.247.x.x + search default.svc.cluster.local svc.cluster.local cluster.local + options ndots:5 + +**Configuration Options** + +- **nameserver**: an IP address list of a name server that the resolver will query. If this parameter is set to 10.247.x.x, the resolver will query the kube-dns/CoreDNS. If this parameter is set to another IP address, the resolver will query a cloud or on-premises DNS server. + +- **search**: a search list for host-name lookup. When a domain name cannot be resolved, DNS queries will be attempted combining the domain name with each domain in the search list in turn until a match is found or all domains in the search list are tried. For CCE clusters, the search list is currently limited to three domains per container. When a nonexistent domain name is being resolved, eight DNS queries will be initiated because each domain name (including those in the search list) will be queried twice, one for IPv4 and the other for IPv6. + +- **options**: options that allow certain internal resolver variables to be modified. Common options include timeout and ndots. + + The value **ndots:5** means that if a domain name has fewer than 5 dots (.), DNS queries will be attempted by combining the domain name with each domain in the search list in turn. If no match is found after all the domains in the search list are tried, the domain name is then used for DNS query. If the domain name has 5 or more than 5 dots, it will be tried first for DNS query. In case that the domain name cannot be resolved, DNS queries will be attempted by combining the domain name with each domain in the search list in turn. + + For example, the domain name **www.***.com** has only two dots (smaller than the value of **ndots**), and therefore the sequence of DNS queries is as follows: **www.***.default.svc.cluster.local**, **www.***.com.svc.cluster.local**, **www.***.com.cluster.local**, and **www.***.com**. This means that at least seven DNS queries will be initiated before the domain name is resolved into an IP address. It is clear that when many unnecessary DNS queries will be initiated to access an external domain name. There is room for improvement in workload's DNS configuration. + +.. note:: + + For more information about configuration options in the resolver configuration file used by Linux operating systems, visit http://man7.org/linux/man-pages/man5/resolv.conf.5.html. + +Configuring DNS Using the Workload YAML +--------------------------------------- + +When creating a workload using a YAML file, you can configure the DNS settings in the YAML. The following is an example for an Nginx application: + +.. code-block:: + + apiVersion: apps/v1 + kind: Deployment + metadata: + name: nginx + namespace: default + spec: + replicas: 1 + selector: + matchLabels: + app: nginx + template: + metadata: + labels: + app: nginx + spec: + containers: + - name: container-1 + image: nginx:latest + imagePullPolicy: IfNotPresent + imagePullSecrets: + - name: default-secret + dnsPolicy: None + dnsConfig: + options: + - name: ndots + value: '5' + - name: timeout + value: '3' + nameservers: + - 10.2.3.4 + searches: + - my.dns.search.suffix + +**dnsPolicy** + +The **dnsPolicy** field is used to configure a DNS policy for an application. The default value is **ClusterFirst**. The DNS parameters in **dnsConfig** will be merged to the DNS file generated according to **dnsPolicy**. The merge rules are later explained in :ref:`Table 2 `. Currently, **dnsPolicy** supports the following four values: + +.. table:: **Table 1** dnsPolicy + + +-----------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | Parameter | Description | + +===================================+=======================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================+ + | ClusterFirst (default value) | CCE cluster's CoreDNS, which is cascaded with the cloud DNS by default, is used for workloads. Containers can resolve both the cluster-internal domain names registered by a Service and the external domain names exposed to public networks. The search list (**search** option) and **ndots: 5** are present in the DNS configuration file. Therefore, when accessing an external domain name and a long cluster-internal domain name (for example, kubernetes.default.svc.cluster.local), the search list will usually be traversed first, resulting in at least six invalid DNS queries. The issue of invalid DNS queries disappears only when a short cluster-internal domain name (for example, kubernetes) is being accessed. | + +-----------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | ClusterFirstWithHostNet | By default, the DNS configuration file that the **--resolv-conf** flag points to is configured for workloads running with **hostNetwork=true**, that is, a cloud DNS is used for CCE clusters. If workloads need to use Kube-DNS/CoreDNS of the cluster, set **dnsPolicy** to **ClusterFirstWithHostNet** and container's DNS configuration file is the same as ClusterFirst, in which invalid DNS queries still exist. | + | | | + | | .. code-block:: | + | | | + | | ... | + | | spec: | + | | containers: | + | | - image: nginx:latest | + | | imagePullPolicy: IfNotPresent | + | | name: container-1 | + | | restartPolicy: Always | + | | hostNetwork: true | + | | dnsPolicy: ClusterFirstWithHostNet | + +-----------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | Default | Container's DNS configuration file is the DNS configuration file that the kubelet's **--resolv-conf** flag points to. In this case, a cloud DNS is used for CCE clusters. Both **search** and **options** fields are left unspecified. This configuration can only resolve the external domain names registered with the Internet, and not cluster-internal domain names. This configuration is free from the issue of invalid DNS queries. | + +-----------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | None | If **dnsPolicy** is set to **None**, the **dnsConfig** field must be specified because all DNS settings are supposed to be provided using the **dnsConfig** field. | + +-----------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + +.. note:: + + If the **dnsPolicy** field is not specified, the default value is **ClusterFirst** instead of **Default**. + +**dnsConfig** + +The **dnsConfig** field is used to configure DNS parameters for workloads. The configured parameters are merged to the DNS configuration file generated according to **dnsPolicy**. If **dnsPolicy** is set to **None**, the workload's DNS configuration file is specified by the **dnsConfig** field. If **dnsPolicy** is not set to **None**, the DNS parameters configured in **dnsConfig** are added to the DNS configuration file generated according to **dnsPolicy**. + +.. _cce_01_0133_0__cce_01_0365_en-us_topic_0000001199021204_table16581121652515: + +.. table:: **Table 2** dnsConfig + + +-------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | Parameter | Description | + +=============+================================================================================================================================================================================================================================================================================================================================================+ + | options | An optional list of objects where each object may have a name property (required) and a value property (optional). The contents in this property will be merged to the options generated from the specified DNS policy in **dnsPolicy**. Duplicate entries are removed. | + +-------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | nameservers | A list of IP addresses that will be used as DNS servers. If workload's **dnsPolicy** is set to **None**, the list must contain at least one IP address, otherwise this property is optional. The servers listed will be combined to the nameservers generated from the specified DNS policy in **dnsPolicy** with duplicate addresses removed. | + +-------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + | searches | A list of DNS search domains for hostname lookup in the Pod. This property is optional. When specified, the provided list will be merged into the search domain names generated from the chosen DNS policy in **dnsPolicy**. Duplicate domain names are removed. Kubernetes allows for at most 6 search domains. | + +-------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + +Configuration Examples +---------------------- + +The following example describes how to configure DNS for workloads. + +- **Use Case 1: Using Kube-DNS/CoreDNS Built in Kubernetes Clusters** + + **Scenario** + + Kubernetes in-cluster Kube-DNS/CoreDNS is applicable to resolving only cluster-internal domain names or cluster-internal domain names + external domain names. This is the default DNS for workloads. + + **Example:** + + .. code-block:: + + apiVersion: v1 + kind: Pod + metadata: + namespace: default + name: dns-example + spec: + containers: + - name: test + image: nginx:alpine + dnsPolicy: ClusterFirst + + Container's DNS configuration file: + + .. code-block:: + + nameserver 10.247.3.10 + search default.svc.cluster.local svc.cluster.local cluster.local + options ndots:5 + +- **Use Case 2: Using a Cloud DNS** + + **Scenario** + + A DNS cannot resolve cluster-internal domain names and therefore is applicable to the scenario where workloads access only external domain names registered with the Internet. + + **Example:** + + .. code-block:: + + apiVersion: v1 + kind: Pod + metadata: + namespace: default + name: dns-example + spec: + containers: + - name: test + image: nginx:alpine + dnsPolicy: Default//The DNS configuration file that the kubelet's --resolv-conf flag points to is used. In this case, a DNS is used for CCE clusters. + + Container's DNS configuration file: + + .. code-block:: + + nameserver 100.125.x.x + +- **Use Case 3: Using Kube-DNS/CoreDNS for Workloads Running with hostNetwork** + + **Scenario** + + By default, a DNS is used for workloads running with hostNetwork. If workloads need to use Kube-DNS/CoreDNS, set **dnsPolicy** to **ClusterFirstWithHostNet**. + + **Example:** + + .. code-block:: + + apiVersion: v1 + kind: Pod + metadata: + name: nginx + spec: + hostNetwork: true + dnsPolicy: ClusterFirstWithHostNet + containers: + - name: nginx + image: nginx:alpine + ports: + - containerPort: 80 + + Container's DNS configuration file: + + .. code-block:: + + nameserver 10.247.3.10 + search default.svc.cluster.local svc.cluster.local cluster.local + options ndots:5 + +- **Use Case 4: Customizing Application's DNS Configuration** + + **Scenario** + + You can flexibly customize the DNS configuration file for applications. Using **dnsPolicy** and **dnsConfig** together can address almost all scenarios, including the scenarios in which an on-premises DNS will be used, multiple DNSs will be cascaded, and DNS configuration options will be modified. + + **Example 1: Using Your On-Premises DNS** + + *Set* **dnsPolicy** *to* **None** *so application's DNS configuration file is generated based on* **dnsConfig**\ *.* + + .. code-block:: + + apiVersion: v1 + kind: Pod + metadata: + namespace: default + name: dns-example + spec: + containers: + - name: test + image: nginx:alpine + dnsPolicy: "None" + dnsConfig: + nameservers: + - 10.2.3.4 //IP address of your on-premises DNS + searches: + - ns1.svc.cluster.local + - my.dns.search.suffix + options: + - name: ndots + value: "2" + - name: timeout + value: "3" + + Container's DNS configuration file: + + .. code-block:: + + nameserver 10.2.3.4 + search ns1.svc.cluster.local my.dns.search.suffix + options timeout:3 ndots:2 + + **Example 2: Modifying the ndots Option in the DNS Configuration File to Reduce Invalid DNS Queries** + + Set **dnsPolicy** to a value other than **None** so the DNS parameters configured in **dnsConfig** are added to the DNS configuration file generated based on **dnsPolicy**. + + .. code-block:: + + apiVersion: v1 + kind: Pod + metadata: + namespace: default + name: dns-example + spec: + containers: + - name: test + image: nginx:alpine + dnsPolicy: "ClusterFirst" + dnsConfig: + options: + - name: ndots + value: "2" //Changes the ndots:5 option in the DNS configuration file generated based on the ClusterFirst policy to ndots:2. + + Container's DNS configuration file: + + .. code-block:: + + nameserver 10.247.3.10 + search default.svc.cluster.local svc.cluster.local cluster.local + options ndots:2 diff --git a/umn/source/workloads/configuring_a_container/index.rst b/umn/source/workloads/configuring_a_container/index.rst index 34ebd91..5374b69 100644 --- a/umn/source/workloads/configuring_a_container/index.rst +++ b/umn/source/workloads/configuring_a_container/index.rst @@ -11,6 +11,7 @@ Configuring a Container - :ref:`Setting Container Startup Commands ` - :ref:`Setting Health Check for a Container ` - :ref:`Setting an Environment Variable ` +- :ref:`DNS Configuration ` .. toctree:: :maxdepth: 1 @@ -22,3 +23,4 @@ Configuring a Container setting_container_startup_commands setting_health_check_for_a_container setting_an_environment_variable + dns_configuration