add documents
@ -1,4 +0,0 @@
|
|||||||
sphinx>=2.0.0,!=2.1.0 # BSD
|
|
||||||
otcdocstheme>=1.0.0 # Apache-2.0
|
|
||||||
# releasenotes
|
|
||||||
reno>=3.1.0 # Apache-2.0
|
|
@ -1,75 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
|
||||||
# implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
|
|
||||||
sys.path.insert(0, os.path.abspath('../..'))
|
|
||||||
# -- General configuration ----------------------------------------------------
|
|
||||||
|
|
||||||
# Add any Sphinx extension module names here, as strings. They can be
|
|
||||||
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
|
|
||||||
extensions = [
|
|
||||||
'sphinx.ext.autodoc',
|
|
||||||
'otcdocstheme',
|
|
||||||
]
|
|
||||||
|
|
||||||
# autodoc generation is a bit aggressive and a nuisance when doing heavy
|
|
||||||
# text edit cycles.
|
|
||||||
# execute "export SPHINX_DEBUG=1" in your terminal to disable
|
|
||||||
|
|
||||||
# The suffix of source filenames.
|
|
||||||
source_suffix = '.rst'
|
|
||||||
|
|
||||||
# The master toctree document.
|
|
||||||
master_doc = 'index'
|
|
||||||
|
|
||||||
# General information about the project.
|
|
||||||
project = 'internal-documentation'
|
|
||||||
copyright = '2022, Open Telekom Cloud Developers'
|
|
||||||
|
|
||||||
# If true, '()' will be appended to :func: etc. cross-reference text.
|
|
||||||
add_function_parentheses = True
|
|
||||||
|
|
||||||
# If true, the current module name will be prepended to all description
|
|
||||||
# unit titles (such as .. function::).
|
|
||||||
add_module_names = True
|
|
||||||
|
|
||||||
# The name of the Pygments (syntax highlighting) style to use.
|
|
||||||
pygments_style = 'native'
|
|
||||||
|
|
||||||
# -- Options for HTML output --------------------------------------------------
|
|
||||||
|
|
||||||
# The theme to use for HTML and HTML Help pages. Major themes that come with
|
|
||||||
# Sphinx are currently 'default' and 'sphinxdoc'.
|
|
||||||
# html_theme_path = ["."]
|
|
||||||
# html_theme = '_theme'
|
|
||||||
# html_static_path = ['static']
|
|
||||||
html_theme = 'otcdocs'
|
|
||||||
|
|
||||||
# Output file base name for HTML help builder.
|
|
||||||
htmlhelp_basename = '%sdoc' % project
|
|
||||||
|
|
||||||
# Grouping the document tree into LaTeX files. List of tuples
|
|
||||||
# (source start file, target name, title, author, documentclass
|
|
||||||
# [howto/manual]).
|
|
||||||
latex_documents = [
|
|
||||||
('index',
|
|
||||||
'%s.tex' % project,
|
|
||||||
'%s Documentation' % project,
|
|
||||||
'Open Telekom Cloud Developers', 'manual'),
|
|
||||||
]
|
|
||||||
|
|
||||||
# Example configuration for intersphinx: refer to the Python standard library.
|
|
||||||
#intersphinx_mapping = {'http://docs.python.org/': None}
|
|
@ -1,3 +0,0 @@
|
|||||||
======================================================
|
|
||||||
Welcome to the documentation of internal-documentation
|
|
||||||
======================================================
|
|
@ -1,4 +0,0 @@
|
|||||||
sphinx>=2.0.0,!=2.1.0 # BSD
|
|
||||||
otcdocstheme>=1.0.0 # Apache-2.0
|
|
||||||
# releasenotes
|
|
||||||
reno>=3.1.0 # Apache-2.0
|
|
@ -1,75 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
|
||||||
# implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
|
|
||||||
sys.path.insert(0, os.path.abspath('../..'))
|
|
||||||
# -- General configuration ----------------------------------------------------
|
|
||||||
|
|
||||||
# Add any Sphinx extension module names here, as strings. They can be
|
|
||||||
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
|
|
||||||
extensions = [
|
|
||||||
'sphinx.ext.autodoc',
|
|
||||||
'otcdocstheme',
|
|
||||||
]
|
|
||||||
|
|
||||||
# autodoc generation is a bit aggressive and a nuisance when doing heavy
|
|
||||||
# text edit cycles.
|
|
||||||
# execute "export SPHINX_DEBUG=1" in your terminal to disable
|
|
||||||
|
|
||||||
# The suffix of source filenames.
|
|
||||||
source_suffix = '.rst'
|
|
||||||
|
|
||||||
# The master toctree document.
|
|
||||||
master_doc = 'index'
|
|
||||||
|
|
||||||
# General information about the project.
|
|
||||||
project = 'internal-documentation'
|
|
||||||
copyright = '2022, Open Telekom Cloud Developers'
|
|
||||||
|
|
||||||
# If true, '()' will be appended to :func: etc. cross-reference text.
|
|
||||||
add_function_parentheses = True
|
|
||||||
|
|
||||||
# If true, the current module name will be prepended to all description
|
|
||||||
# unit titles (such as .. function::).
|
|
||||||
add_module_names = True
|
|
||||||
|
|
||||||
# The name of the Pygments (syntax highlighting) style to use.
|
|
||||||
pygments_style = 'native'
|
|
||||||
|
|
||||||
# -- Options for HTML output --------------------------------------------------
|
|
||||||
|
|
||||||
# The theme to use for HTML and HTML Help pages. Major themes that come with
|
|
||||||
# Sphinx are currently 'default' and 'sphinxdoc'.
|
|
||||||
# html_theme_path = ["."]
|
|
||||||
# html_theme = '_theme'
|
|
||||||
# html_static_path = ['static']
|
|
||||||
html_theme = 'otcdocs'
|
|
||||||
|
|
||||||
# Output file base name for HTML help builder.
|
|
||||||
htmlhelp_basename = '%sdoc' % project
|
|
||||||
|
|
||||||
# Grouping the document tree into LaTeX files. List of tuples
|
|
||||||
# (source start file, target name, title, author, documentclass
|
|
||||||
# [howto/manual]).
|
|
||||||
latex_documents = [
|
|
||||||
('index',
|
|
||||||
'%s.tex' % project,
|
|
||||||
'%s Documentation' % project,
|
|
||||||
'Open Telekom Cloud Developers', 'manual'),
|
|
||||||
]
|
|
||||||
|
|
||||||
# Example configuration for intersphinx: refer to the Python standard library.
|
|
||||||
#intersphinx_mapping = {'http://docs.python.org/': None}
|
|
@ -1,3 +0,0 @@
|
|||||||
======================================================
|
|
||||||
Welcome to the documentation of internal-documentation
|
|
||||||
======================================================
|
|
BIN
doc/source/_static/images/added_new_text.png
Executable file
After Width: | Height: | Size: 32 KiB |
BIN
doc/source/_static/images/api_calling_process_flow.png
Normal file
After Width: | Height: | Size: 30 KiB |
BIN
doc/source/_static/images/compare_commits.png
Executable file
After Width: | Height: | Size: 72 KiB |
BIN
doc/source/_static/images/compare_images.png
Executable file
After Width: | Height: | Size: 50 KiB |
BIN
doc/source/_static/images/compare_text.png
Executable file
After Width: | Height: | Size: 55 KiB |
BIN
doc/source/_static/images/helpcenter_gitops.png
Executable file
After Width: | Height: | Size: 186 KiB |
BIN
doc/source/_static/images/jira_document_pr_link.png
Executable file
After Width: | Height: | Size: 118 KiB |
BIN
doc/source/_static/images/obtain_x-subject-token.png
Normal file
After Width: | Height: | Size: 51 KiB |
BIN
doc/source/_static/images/otc.png
Normal file
After Width: | Height: | Size: 16 KiB |
BIN
doc/source/_static/images/permissions.png
Normal file
After Width: | Height: | Size: 52 KiB |
BIN
doc/source/_static/images/sample_code_project_structure.png
Normal file
After Width: | Height: | Size: 22 KiB |
BIN
doc/source/_static/images/sample_code_select_file.png
Normal file
After Width: | Height: | Size: 27 KiB |
BIN
doc/source/_static/images/sample_code_select_project.png
Normal file
After Width: | Height: | Size: 24 KiB |
BIN
doc/source/_static/images/sdkdemo_properties.png
Normal file
After Width: | Height: | Size: 76 KiB |
After Width: | Height: | Size: 23 KiB |
BIN
doc/source/_static/images/viewing_domain_id.png
Normal file
After Width: | Height: | Size: 175 KiB |
BIN
doc/source/_static/images/viewing_project_ids.jpg
Normal file
After Width: | Height: | Size: 110 KiB |
@ -1,3 +1,8 @@
|
|||||||
======================================================
|
======================
|
||||||
Welcome to the documentation of internal-documentation
|
Internal Documentation
|
||||||
======================================================
|
======================
|
||||||
|
|
||||||
|
.. toctree::
|
||||||
|
:maxdepth: 2
|
||||||
|
|
||||||
|
training/index
|
||||||
|
110
doc/source/training/apimon_training/alerts.rst
Normal file
29
doc/source/training/apimon_training/contact.rst
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
Contact - Whom to address for Feedback?
|
||||||
|
=======================================
|
||||||
|
|
||||||
|
In case you have any feedback, proposals or found any issues regarding the
|
||||||
|
ApiMon, EpMon or CloudMon, you can address them in the corresponding GitHub
|
||||||
|
OpenTelekomCloud-Infra repositories or StackMon repositories.
|
||||||
|
|
||||||
|
Issues or feedback regarding the **ApiMon, EpMon, Status Dashboard, Metric
|
||||||
|
processor** as well as new feature requests can be addressed by filing an issue
|
||||||
|
on the **Gihub** repository under
|
||||||
|
https://github.com/opentelekomcloud-infra/system-config/blob/main/inventory/service/group_vars/apimon.yaml (CMO)
|
||||||
|
https://github.com/opentelekomcloud-infra/stackmon-config (FMO)
|
||||||
|
|
||||||
|
If you have found any problems which affects the **ApiMon dashboard design**
|
||||||
|
please open an issue/PR on **GitHub**
|
||||||
|
https://github.com/opentelekomcloud-infra/system-config/tree/main/playbooks/templates/grafana/apimon (CMO)
|
||||||
|
https://github.com/stackmon/apimon-tests (FMO)
|
||||||
|
|
||||||
|
|
||||||
|
If you have found any problems which affects the **ApiMon playbook scenarios**
|
||||||
|
please open an issue/PR on **GitHub**
|
||||||
|
https://github.com/opentelekomcloud-infra/apimon-tests (CMO)
|
||||||
|
https://github.com/stackmon/apimon-tests (FMO).
|
||||||
|
|
||||||
|
If there is another issue/demand/request try to locate proper repository in
|
||||||
|
https://github.com/orgs/stackmon/repositories
|
||||||
|
|
||||||
|
For general questions you can write an E-Mail to the `Ecosystems Squad
|
||||||
|
<mailto:dl-pbcotcdeleco@t-systems.com>`_.
|
148
doc/source/training/apimon_training/dashboards.rst
Normal file
141
doc/source/training/apimon_training/databases.rst
Normal file
34
doc/source/training/apimon_training/difference_cmo_fmo.rst
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
.. _difference_apimon_cmo_fmo:
|
||||||
|
|
||||||
|
===================================
|
||||||
|
Difference ApiMon(CMO)/ApiMon(FMO)
|
||||||
|
===================================
|
||||||
|
|
||||||
|
Due to the ongoing transformation of ApiMon and integration to a more robust
|
||||||
|
CloudMon there are two operation modes right now. Therefore it's important to
|
||||||
|
understand what is supported in which mode.
|
||||||
|
|
||||||
|
This page aims to provide navigation links and understand the changes once the
|
||||||
|
transformation is completed and some of the locations will change.
|
||||||
|
|
||||||
|
The most important differences are described in the table below:
|
||||||
|
|
||||||
|
+---------------------+------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------+
|
||||||
|
| **Differences** | **ApiMon (CMO)** | **ApiMon(FMO)** |
|
||||||
|
+=====================+============================================================================================================+==========================================================================+
|
||||||
|
| Playbook scenarios | https://github.com/opentelekomcloud-infra/apimon-test | https://github.com/stackmon/apimon-tests/tree/main/playbooks |
|
||||||
|
+---------------------+------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------+
|
||||||
|
| Dashboards setup | https://github.com/opentelekomcloud-infra/system-config/tree/main/playbooks/templates/grafana/apimon | https://github.com/stackmon/apimon-tests/tree/main/dashboards |
|
||||||
|
+---------------------+------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------+
|
||||||
|
| Environment setup | https://github.com/opentelekomcloud-infra/system-config/blob/main/inventory/service/group_vars/apimon.yaml | https://github.com/opentelekomcloud-infra/stackmon-config |
|
||||||
|
+---------------------+------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------+
|
||||||
|
| Implementation mode | standalone app | plugin based |
|
||||||
|
+---------------------+------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------+
|
||||||
|
| Organization | opentelekomcloud-infra | stackmon |
|
||||||
|
+---------------------+------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------+
|
||||||
|
| Dashboards | https://dashboard.tsi-dev.otc-service.com/ | https://dashboard.tsi-dev.otc-service.com/ |
|
||||||
|
| | https://dashboard.tsi-dev.otc-service.com/dashboards/f/UaB8meoZk/apimon | https://dashboard.tsi-dev.otc-service.com/dashboards/f/CloudMon/cloudmon |
|
||||||
|
+---------------------+------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------+
|
||||||
|
| Documentation | https://confluence.tsi-dev.otc-service.com/display/ES/API-Monitoring | https://stackmon.github.io/ |
|
||||||
|
| | | https://stackmon-cloudmon.readthedocs.io/en/latest/index.html |
|
||||||
|
+---------------------+------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------+
|
40
doc/source/training/apimon_training/epmon_checks.rst
Normal file
@ -0,0 +1,40 @@
|
|||||||
|
.. _epmon_overview:
|
||||||
|
|
||||||
|
============================
|
||||||
|
Endpoint Monitoring overview
|
||||||
|
============================
|
||||||
|
|
||||||
|
|
||||||
|
EpMon is a standalone python based process targeting every OTC service. It
|
||||||
|
finds service in the service catalogs and sends GET requests to the configured
|
||||||
|
endpoints.
|
||||||
|
|
||||||
|
Performing extensive tests like provisioning a server is giving a great
|
||||||
|
coverage, but is usually not something what can be performed very often and
|
||||||
|
leaves certain gaps on the timescale of monitoring. In order to cover this gap
|
||||||
|
EpMon component is capable to send GET requests to the given URLs relying on the
|
||||||
|
API discovery of the OpenStack cloud (perform GET request to /servers or the
|
||||||
|
compute endpoint). Such requests are cheap and can be performed in the loop, i.e.
|
||||||
|
every 5 seconds. Latency of those calls, as well as the return codes, are being
|
||||||
|
captured and sent to the metrics storage.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Currently EpMon configuration is located in system-config:
|
||||||
|
https://github.com/opentelekomcloud-infra/system-config/blob/main/inventory/service/group_vars/apimon.yaml
|
||||||
|
(this will change in future once CloudMon will take place)
|
||||||
|
|
||||||
|
And defines the query HTTP targets for every single OTC service.
|
||||||
|
|
||||||
|
EpMon dashboard provides general availability status of every service definition
|
||||||
|
from service catalog:
|
||||||
|
|
||||||
|
.. image:: training_images/epmon_status_dashboard.jpg
|
||||||
|
|
||||||
|
Additionally it provides further details for the endpoints like response times,
|
||||||
|
detected error codes or no responses at all.
|
||||||
|
|
||||||
|
.. image:: training_images/epmon_dashboard_details.jpg
|
||||||
|
|
||||||
|
EpMon findings are also reported to Alerta and notifications are sent to Zulip
|
||||||
|
dedicated topic "apimon_endpoint_monitoring".
|
After Width: | Height: | Size: 109 KiB |
After Width: | Height: | Size: 60 KiB |
After Width: | Height: | Size: 68 KiB |
After Width: | Height: | Size: 16 KiB |
@ -0,0 +1,7 @@
|
|||||||
|
============================
|
||||||
|
How Can I Access Dashboard ?
|
||||||
|
============================
|
||||||
|
|
||||||
|
OTC LDAP authentication is supported on
|
||||||
|
https://dashboard.tsi-dev.otc-service.com.
|
||||||
|
|
@ -0,0 +1,80 @@
|
|||||||
|
.. _working_with_logs:
|
||||||
|
|
||||||
|
=============================================
|
||||||
|
How To Read The Logs And Understand The Issue
|
||||||
|
=============================================
|
||||||
|
|
||||||
|
|
||||||
|
Logs are stored on swift OBS and they expire after ~1 week. The logs are can be
|
||||||
|
accessed from multiple locations:
|
||||||
|
|
||||||
|
- Zulip notifications:
|
||||||
|
|
||||||
|
|
||||||
|
.. image:: faq_images/zulip_notification_links.jpg
|
||||||
|
|
||||||
|
|
||||||
|
- Alerts in Alerta
|
||||||
|
|
||||||
|
|
||||||
|
.. image:: faq_images/alerta_alerts_detail.png
|
||||||
|
|
||||||
|
|
||||||
|
- Tables in dashboards
|
||||||
|
|
||||||
|
|
||||||
|
.. image:: faq_images/dashboard_log_links.jpg
|
||||||
|
|
||||||
|
|
||||||
|
The logs contain whole ansible playbook output and help to analyze the problem
|
||||||
|
in detail.
|
||||||
|
For example following log detail describes the failed scenario for ECS deployment::
|
||||||
|
|
||||||
|
2023-05-17 21:08:09.038955 | TASK [server_create_delete : Try connecting]
|
||||||
|
2023-05-17 21:08:09.485569 | localhost | ERROR
|
||||||
|
2023-05-17 21:08:09.485862 | localhost | {
|
||||||
|
2023-05-17 21:08:09.485922 | localhost | "changed": true,
|
||||||
|
2023-05-17 21:08:09.485950 | localhost | "cmd": [
|
||||||
|
2023-05-17 21:08:09.485984 | localhost | "ssh",
|
||||||
|
2023-05-17 21:08:09.486016 | localhost | "-o",
|
||||||
|
2023-05-17 21:08:09.486052 | localhost | "UserKnownHostsFile=/dev/null",
|
||||||
|
2023-05-17 21:08:09.486076 | localhost | "-o",
|
||||||
|
2023-05-17 21:08:09.486097 | localhost | "StrictHostKeyChecking=no",
|
||||||
|
2023-05-17 21:08:09.486118 | localhost | "linux@80.158.60.117",
|
||||||
|
2023-05-17 21:08:09.486138 | localhost | "-i",
|
||||||
|
2023-05-17 21:08:09.486160 | localhost | "~/.ssh/scenario2a-162b6915911748c5809474be69d2a3b3-kp.pem"
|
||||||
|
2023-05-17 21:08:09.486192 | localhost | ],
|
||||||
|
2023-05-17 21:08:09.486221 | localhost | "delta": "0:00:00.127394",
|
||||||
|
2023-05-17 21:08:09.486242 | localhost | "end": "2023-05-17 21:08:09.454247",
|
||||||
|
2023-05-17 21:08:09.486262 | localhost | "invocation": {
|
||||||
|
2023-05-17 21:08:09.486283 | localhost | "module_args": {
|
||||||
|
2023-05-17 21:08:09.486314 | localhost | "_raw_params": "ssh -o 'UserKnownHostsFile=/dev/null' -o 'StrictHostKeyChecking=no' linux@80.158.60.117 -i ~/.ssh/scenario2a-162b6915911748c5809474be69d2a3b3-kp.pem",
|
||||||
|
2023-05-17 21:08:09.486373 | localhost | "_uses_shell": false,
|
||||||
|
2023-05-17 21:08:09.486397 | localhost | "argv": null,
|
||||||
|
2023-05-17 21:08:09.486428 | localhost | "chdir": null,
|
||||||
|
2023-05-17 21:08:09.486455 | localhost | "creates": null,
|
||||||
|
2023-05-17 21:08:09.486487 | localhost | "executable": null,
|
||||||
|
2023-05-17 21:08:09.486513 | localhost | "removes": null,
|
||||||
|
2023-05-17 21:08:09.486533 | localhost | "stdin": null,
|
||||||
|
2023-05-17 21:08:09.486553 | localhost | "stdin_add_newline": true,
|
||||||
|
2023-05-17 21:08:09.486573 | localhost | "strip_empty_ends": true,
|
||||||
|
2023-05-17 21:08:09.486593 | localhost | "warn": false
|
||||||
|
2023-05-17 21:08:09.486613 | localhost | }
|
||||||
|
2023-05-17 21:08:09.486633 | localhost | },
|
||||||
|
2023-05-17 21:08:09.486657 | localhost | "msg": "non-zero return code",
|
||||||
|
2023-05-17 21:08:09.486689 | localhost | "rc": 255,
|
||||||
|
2023-05-17 21:08:09.486713 | localhost | "start": "2023-05-17 21:08:09.326853",
|
||||||
|
2023-05-17 21:08:09.486734 | localhost | "stderr": "Pseudo-terminal will not be allocated because stdin is not a terminal.\r\nWarning: Permanently added '80.158.60.117' (ED25519) to the list of known hosts.\r\nlinux@80.158.60.117: Permission denied (publickey).",
|
||||||
|
2023-05-17 21:08:09.486755 | localhost | "stderr_lines": [
|
||||||
|
2023-05-17 21:08:09.486776 | localhost | "Pseudo-terminal will not be allocated because stdin is not a terminal.",
|
||||||
|
2023-05-17 21:08:09.486808 | localhost | "Warning: Permanently added '80.158.60.117' (ED25519) to the list of known hosts.",
|
||||||
|
2023-05-17 21:08:09.486834 | localhost | "linux@80.158.60.117: Permission denied (publickey)."
|
||||||
|
2023-05-17 21:08:09.486855 | localhost | ]
|
||||||
|
2023-05-17 21:08:09.486875 | localhost | }
|
||||||
|
|
||||||
|
In this case it seems that deployed ECS doesn't contain injected public SSH key
|
||||||
|
which can point to issue with cloud init or metadata server.
|
||||||
|
|
||||||
|
The playbooks can be run also manually on any OTC tenant and can be used
|
||||||
|
for further investigation and analysis.
|
||||||
|
|
10
doc/source/training/apimon_training/faq/index.rst
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
==========================
|
||||||
|
Frequently Asked Questions
|
||||||
|
==========================
|
||||||
|
|
||||||
|
.. toctree::
|
||||||
|
:maxdepth: 1
|
||||||
|
|
||||||
|
how_can_i_access_dashboard
|
||||||
|
how_to_read_the_logs_and_understand_the_issue
|
||||||
|
what_are_the_annotations
|
@ -0,0 +1,22 @@
|
|||||||
|
#########################
|
||||||
|
What Are The Annotations?
|
||||||
|
#########################
|
||||||
|
|
||||||
|
Annotations provide a way to mark points on the graph with rich events. When you
|
||||||
|
hover over an annotation you can get event description and event tags. The text
|
||||||
|
field can include links to other systems with more detail.
|
||||||
|
|
||||||
|
.. image:: faq_images/annotations.jpg
|
||||||
|
|
||||||
|
|
||||||
|
In ApiMon Dashboards annotations are used to show the JIRA change issue types
|
||||||
|
which change the transition from SCHEDULED to IN EXECUTION. This helps to
|
||||||
|
identify if some JIRA change has negative impact on platform in real time. The
|
||||||
|
annotations contain several fields which help to correlate the platform behavior
|
||||||
|
with the respective change directly on the dashboard:
|
||||||
|
|
||||||
|
- JIRA Change issue ID
|
||||||
|
- Impacted Availability Zone
|
||||||
|
- Affected Environment
|
||||||
|
- Main component
|
||||||
|
- Summary
|
22
doc/source/training/apimon_training/index.rst
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
===================
|
||||||
|
Apimon Training
|
||||||
|
===================
|
||||||
|
|
||||||
|
.. toctree::
|
||||||
|
:maxdepth: 1
|
||||||
|
|
||||||
|
introduction
|
||||||
|
workflow
|
||||||
|
monitoring_coverage
|
||||||
|
test_scenarios
|
||||||
|
epmon_checks
|
||||||
|
dashboards
|
||||||
|
metrics
|
||||||
|
databases
|
||||||
|
alerts
|
||||||
|
notifications
|
||||||
|
logs
|
||||||
|
difference_cmo_fmo
|
||||||
|
contact
|
||||||
|
recorded_session
|
||||||
|
faq/index
|
108
doc/source/training/apimon_training/introduction.rst
Normal file
45
doc/source/training/apimon_training/logs.rst
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
.. _logs:
|
||||||
|
|
||||||
|
====
|
||||||
|
Logs
|
||||||
|
====
|
||||||
|
|
||||||
|
|
||||||
|
- Every single job run log is stored on OpenStack Swift object storage.
|
||||||
|
- Each single job log file provides unique URL which can be accessed to see log
|
||||||
|
details
|
||||||
|
- These URLs are available on all ApiMon levels:
|
||||||
|
|
||||||
|
- In Zulip alarm messages
|
||||||
|
- In Alerta events
|
||||||
|
- In Grafana Dashboards
|
||||||
|
|
||||||
|
- Logs are simple plain text files of the whole playbook output::
|
||||||
|
|
||||||
|
2020-07-12 05:54:04.661170 | TASK [List Servers]
|
||||||
|
2020-07-12 05:54:09.050491 | localhost | ok
|
||||||
|
2020-07-12 05:54:09.067582 | TASK [Create Server in default AZ]
|
||||||
|
2020-07-12 05:54:46.055650 | localhost | MODULE FAILURE:
|
||||||
|
2020-07-12 05:54:46.055873 | localhost | Traceback (most recent call last):
|
||||||
|
2020-07-12 05:54:46.057441 | localhost |
|
||||||
|
2020-07-12 05:54:46.057499 | localhost | During handling of the above exception, another exception occurred:
|
||||||
|
2020-07-12 05:54:46.057535 | localhost |
|
||||||
|
…
|
||||||
|
2020-07-12 05:54:46.063992 | localhost | File "/tmp/ansible_os_server_payload_uz1c7_iw/ansible_os_server_payload.zip/ansible/modules/cloud/openstack/os_server.py", line 500, in _create_server
|
||||||
|
2020-07-12 05:54:46.065152 | localhost | return self._send_request(
|
||||||
|
2020-07-12 05:54:46.065186 | localhost | File "/root/.local/lib/python3.8/site-packages/keystoneauth1/session.py", line 1020, in _send_request
|
||||||
|
2020-07-12 05:54:46.065334 | localhost | raise exceptions.ConnectFailure(msg)
|
||||||
|
2020-07-12 05:54:46.065378 | localhost | keystoneauth1.exceptions.connection.ConnectFailure: Unable to establish connection to https://ims.eu-de.otctest.t-systems.com/v2/images: ('Connection aborted.', OSError(107, 'Transport endpoint is not connected'))
|
||||||
|
2020-07-12 05:54:46.295035 |
|
||||||
|
2020-07-12 05:54:46.295241 | TASK [Delete server]
|
||||||
|
2020-07-12 05:54:48.481374 | localhost | ok
|
||||||
|
2020-07-12 05:54:48.505761 |
|
||||||
|
2020-07-12 05:54:48.505906 | TASK [Delete SecurityGroup]
|
||||||
|
2020-07-12 05:54:50.727174 | localhost | changed
|
||||||
|
2020-07-12 05:54:50.745541 |
|
||||||
|
|
||||||
|
|
||||||
|
For further details how to work with logs please refer to
|
||||||
|
:ref:`How To Read The Logs And Understand The Issue <working_with_logs>` FAQ
|
||||||
|
page.
|
||||||
|
|
57
doc/source/training/apimon_training/metrics.rst
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
.. _metrics_definition:
|
||||||
|
|
||||||
|
=======
|
||||||
|
Metrics
|
||||||
|
=======
|
||||||
|
|
||||||
|
The Ansible playbook scenarios generate metrics in two ways:
|
||||||
|
|
||||||
|
- The Ansible playbook internally invokes method calls to **OpenStack SDK
|
||||||
|
libraries.** They in turn generate metrics about each API call they do. This
|
||||||
|
requires some special configuration in the clouds.yaml file (currently
|
||||||
|
exposing metrics into statsd and InfluxDB is supported). For details refer
|
||||||
|
to the `config
|
||||||
|
documentation <https://docs.openstack.org/openstacksdk/latest/user/guides/stats.html>`_
|
||||||
|
of the OpenStack SDK. The following metrics are captured:
|
||||||
|
|
||||||
|
- response HTTP code
|
||||||
|
- duration of API call
|
||||||
|
- name of API call
|
||||||
|
- method of API call
|
||||||
|
- service type
|
||||||
|
|
||||||
|
- Ansible plugins may **expose additional metrics** (i.e. whether the overall
|
||||||
|
scenario succeed or not) with help of `callback
|
||||||
|
plugin <https://github.com/stackmon/apimon/tree/main/apimon/ansible/callback>`_.
|
||||||
|
Since sometimes it is not sufficient to know only the timings of each API
|
||||||
|
call, Ansible callbacks are utilized to report overall execution time and
|
||||||
|
result (whether the scenario succeeded and how long it took). The following
|
||||||
|
metrics are captured:
|
||||||
|
|
||||||
|
- test case
|
||||||
|
- playbook name
|
||||||
|
- environment
|
||||||
|
- action name
|
||||||
|
- result code
|
||||||
|
- result string
|
||||||
|
- service type
|
||||||
|
- state type
|
||||||
|
- total amount of (failed, passed, ignored, skipped tests)
|
||||||
|
|
||||||
|
Custom metrics:
|
||||||
|
|
||||||
|
In some situations more complex metric generation is required which consists of
|
||||||
|
execution of multiple tasks in scenario. For such cases, the tags parameter is
|
||||||
|
used. Once the specific tasks in playbook are tagged with some specific metric
|
||||||
|
name the metrics are calculated as sum of all executed tasks with respective
|
||||||
|
tag. It's useful in cases where the measured metric contains multiple steps to
|
||||||
|
achieve the desired state of service or service resource. For example, boot up of
|
||||||
|
virtual machine from deployment until successful login via SSH.
|
||||||
|
|
||||||
|
.. code-block::
|
||||||
|
|
||||||
|
tags: ["metric=delete_server"]
|
||||||
|
tags: ["az={{ availability_zone }}", "service=compute", "metric=create_server{{ metric_suffix }}"]
|
||||||
|
|
||||||
|
More details how to query metrics from databases are described on :ref:`Metric
|
||||||
|
databases <metric_databases>` page.
|
51
doc/source/training/apimon_training/monitoring_coverage.rst
Normal file
@ -0,0 +1,51 @@
|
|||||||
|
===================
|
||||||
|
Monitoring coverage
|
||||||
|
===================
|
||||||
|
|
||||||
|
Multiple factors define the monitoring coverage to simulate common customer use
|
||||||
|
cases.
|
||||||
|
|
||||||
|
|
||||||
|
Monitored locations
|
||||||
|
###################
|
||||||
|
|
||||||
|
* EU-DE
|
||||||
|
* EU-NL
|
||||||
|
* PREPROD (EU_DE)
|
||||||
|
* EU-CH2 (Swisscloud)
|
||||||
|
|
||||||
|
|
||||||
|
Monitoring sources
|
||||||
|
##################
|
||||||
|
|
||||||
|
* Inside OTC (eu-de, eu-ch2)
|
||||||
|
* Outside OTC (Swisscloud)
|
||||||
|
|
||||||
|
|
||||||
|
Monitored targets
|
||||||
|
#################
|
||||||
|
|
||||||
|
* Endpoints and HTTP query requests
|
||||||
|
|
||||||
|
* all services
|
||||||
|
* multiple GET queries
|
||||||
|
|
||||||
|
* Static Resources
|
||||||
|
|
||||||
|
* specific services
|
||||||
|
* availability of the resource or resource functionality
|
||||||
|
|
||||||
|
* Dynamic resources
|
||||||
|
|
||||||
|
* ansible playbooks
|
||||||
|
* specific services
|
||||||
|
* monitoring of most common use cases in cloud services
|
||||||
|
|
||||||
|
|
||||||
|
Monitoring dashboards
|
||||||
|
#####################
|
||||||
|
|
||||||
|
* KPI dashboards
|
||||||
|
* 24/7 dashboards
|
||||||
|
* Test results dashboards
|
||||||
|
* Specific service dashboards
|
68
doc/source/training/apimon_training/notifications.rst
Normal file
@ -0,0 +1,68 @@
|
|||||||
|
=============
|
||||||
|
Notifications
|
||||||
|
=============
|
||||||
|
|
||||||
|
Zulip as official OTC communication channel supports API interface for pushing
|
||||||
|
the notifications from ApiMon to various Zulip streams:
|
||||||
|
|
||||||
|
- #Alerts Stream
|
||||||
|
- #Alerts-Hybrid Stream
|
||||||
|
- #Alerts-Preprod Stream
|
||||||
|
|
||||||
|
Every stream contains topics based on the service type (if represented by
|
||||||
|
standalone Ansible playbook) and general apimon_endpoint_monitor topic which
|
||||||
|
contains alerts of GET queries towards all services.
|
||||||
|
|
||||||
|
|
||||||
|
.. image:: training_images/zulip_notifications.png
|
||||||
|
|
||||||
|
|
||||||
|
If the error has been acknowledged on Alerta, the new notification message for
|
||||||
|
repeating error won't get posted again on Zulip.
|
||||||
|
|
||||||
|
Notifications contain further details which help to identify root cause faster
|
||||||
|
and more effectively.
|
||||||
|
|
||||||
|
Notification parameters
|
||||||
|
#######################
|
||||||
|
|
||||||
|
The ApiMon notification consists of several fields:
|
||||||
|
|
||||||
|
+---------------------------+------------------------------------------------------------------------+
|
||||||
|
| Notification Field | Description |
|
||||||
|
+===========================+========================================================================+
|
||||||
|
| **APIMon Alert link** | Reference to alert in Alerta |
|
||||||
|
+---------------------------+------------------------------------------------------------------------+
|
||||||
|
| **Status** | Status of the alert in Alerta |
|
||||||
|
+---------------------------+------------------------------------------------------------------------+
|
||||||
|
| **Environment** | Information about affected environment/region |
|
||||||
|
+---------------------------+------------------------------------------------------------------------+
|
||||||
|
| **Severity** | Severity of the alarm |
|
||||||
|
+---------------------------+------------------------------------------------------------------------+
|
||||||
|
| **Origin** | Information about origin location from where the job has been executed |
|
||||||
|
+---------------------------+------------------------------------------------------------------------+
|
||||||
|
| **Service** | Information about affected service and type of monitoring |
|
||||||
|
+---------------------------+------------------------------------------------------------------------+
|
||||||
|
| **Resource** | Further details in which particular resource issue has happened |
|
||||||
|
+---------------------------+------------------------------------------------------------------------+
|
||||||
|
| **Error message Summary** | Short description of error result |
|
||||||
|
+---------------------------+------------------------------------------------------------------------+
|
||||||
|
| **Execution Log link** | Reference to job execution output on Swift object storage |
|
||||||
|
+---------------------------+------------------------------------------------------------------------+
|
||||||
|
|
||||||
|
Th EpMon notification consists of several fields:
|
||||||
|
|
||||||
|
+----------------------------+------------------------------------------------------------------+
|
||||||
|
| Notification Field | Description |
|
||||||
|
+============================+==================================================================+
|
||||||
|
| **APIMon Alert link** | Reference to alert in Alerta |
|
||||||
|
+----------------------------+------------------------------------------------------------------+
|
||||||
|
| **Environment** | Information about affected environment/region |
|
||||||
|
+----------------------------+------------------------------------------------------------------+
|
||||||
|
| **Curl command** | Interpreted request in curl format for reproducible applications |
|
||||||
|
+----------------------------+------------------------------------------------------------------+
|
||||||
|
| **Request error response** | Error result of the requested API call |
|
||||||
|
+----------------------------+------------------------------------------------------------------+
|
||||||
|
|
||||||
|
|
||||||
|
|
14
doc/source/training/apimon_training/recorded_session.rst
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
.. _recorded_session:
|
||||||
|
|
||||||
|
================
|
||||||
|
Recorded Session
|
||||||
|
================
|
||||||
|
|
||||||
|
Session from 26.05.2023 has been recorded and videos are available on OBS.
|
||||||
|
|
||||||
|
`Part 1 <https://apimon-training.obs.eu-de.otc.t-systems.com/API_MON_part_1.mkv>`_
|
||||||
|
|
||||||
|
`Part 2 <https://apimon-training.obs.eu-de.otc.t-systems.com/API_MON_part_2.mkv>`_
|
||||||
|
|
||||||
|
`Part 3 <https://apimon-training.obs.eu-de.otc.t-systems.com/API_MON_part_3.mkv>`_
|
||||||
|
|
199
doc/source/training/apimon_training/test_scenarios.rst
Normal file
After Width: | Height: | Size: 188 KiB |
After Width: | Height: | Size: 42 KiB |
After Width: | Height: | Size: 56 KiB |
After Width: | Height: | Size: 88 KiB |