From f596b3c45ce43727ef01f840157b80b24121683a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20=C5=A0tiller?= Date: Wed, 30 Sep 2020 13:56:22 +0200 Subject: [PATCH] Fix CPU/Memory usage metrics when using prometheus operator. (#632) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cluster CPU and Memory usage cannot be sum by node as it is rendered as single cluster metrics. Without this changes, the cluster graphs (standard and pie) shows data only from first (or latest) nodes. For pod specific metrics, when we use prometheus operator the `container_` metics are doubled. Restric search to get those have image tag. Signed-off-by: Jakub Štiller Co-authored-by: Jakub Štiller --- src/main/prometheus/operator.ts | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/src/main/prometheus/operator.ts b/src/main/prometheus/operator.ts index 37efa64b98..3d335ff554 100644 --- a/src/main/prometheus/operator.ts +++ b/src/main/prometheus/operator.ts @@ -37,19 +37,19 @@ export class PrometheusOperator implements PrometheusProvider { memoryUsage: ` sum( node_memory_MemTotal_bytes - (node_memory_MemFree_bytes + node_memory_Buffers_bytes + node_memory_Cached_bytes) - ) by (node) + ) `.replace(/_bytes/g, `_bytes * on (pod,namespace) group_left(node) kube_pod_info{node=~"${opts.nodes}"}`), - memoryRequests: `sum(kube_pod_container_resource_requests{node=~"${opts.nodes}", resource="memory"}) by (component)`, - memoryLimits: `sum(kube_pod_container_resource_limits{node=~"${opts.nodes}", resource="memory"}) by (component)`, - memoryCapacity: `sum(kube_node_status_capacity{node=~"${opts.nodes}", resource="memory"}) by (component)`, - cpuUsage: `sum(rate(node_cpu_seconds_total{mode=~"user|system"}[${this.rateAccuracy}])* on (pod,namespace) group_left(node) kube_pod_info{node=~"${opts.nodes}"}) by (node)`, - cpuRequests:`sum(kube_pod_container_resource_requests{node=~"${opts.nodes}", resource="cpu"}) by (component)`, - cpuLimits: `sum(kube_pod_container_resource_limits{node=~"${opts.nodes}", resource="cpu"}) by (component)`, - cpuCapacity: `sum(kube_node_status_capacity{node=~"${opts.nodes}", resource="cpu"}) by (component)`, + memoryRequests: `sum(kube_pod_container_resource_requests{node=~"${opts.nodes}", resource="memory"})`, + memoryLimits: `sum(kube_pod_container_resource_limits{node=~"${opts.nodes}", resource="memory"})`, + memoryCapacity: `sum(kube_node_status_capacity{node=~"${opts.nodes}", resource="memory"})`, + cpuUsage: `sum(rate(node_cpu_seconds_total{mode=~"user|system"}[${this.rateAccuracy}])* on (pod,namespace) group_left(node) kube_pod_info{node=~"${opts.nodes}"})`, + cpuRequests:`sum(kube_pod_container_resource_requests{node=~"${opts.nodes}", resource="cpu"})`, + cpuLimits: `sum(kube_pod_container_resource_limits{node=~"${opts.nodes}", resource="cpu"})`, + cpuCapacity: `sum(kube_node_status_capacity{node=~"${opts.nodes}", resource="cpu"})`, podUsage: `sum(kubelet_running_pod_count{node=~"${opts.nodes}"})`, - podCapacity: `sum(kube_node_status_capacity{node=~"${opts.nodes}", resource="pods"}) by (component)`, - fsSize: `sum(node_filesystem_size_bytes{mountpoint="/"} * on (pod,namespace) group_left(node) kube_pod_info{node=~"${opts.nodes}"}) by (node)`, - fsUsage: `sum(node_filesystem_size_bytes{mountpoint="/"} * on (pod,namespace) group_left(node) kube_pod_info{node=~"${opts.nodes}"} - node_filesystem_avail_bytes{mountpoint="/"} * on (pod,namespace) group_left(node) kube_pod_info{node=~"${opts.nodes}"}) by (node)` + podCapacity: `sum(kube_node_status_capacity{node=~"${opts.nodes}", resource="pods"})`, + fsSize: `sum(node_filesystem_size_bytes{mountpoint="/"} * on (pod,namespace) group_left(node) kube_pod_info{node=~"${opts.nodes}"})`, + fsUsage: `sum(node_filesystem_size_bytes{mountpoint="/"} * on (pod,namespace) group_left(node) kube_pod_info{node=~"${opts.nodes}"} - node_filesystem_avail_bytes{mountpoint="/"} * on (pod,namespace) group_left(node) kube_pod_info{node=~"${opts.nodes}"})` } case 'nodes': return { @@ -62,10 +62,10 @@ export class PrometheusOperator implements PrometheusProvider { } case 'pods': return { - cpuUsage: `sum(rate(container_cpu_usage_seconds_total{container!="POD",container!="",pod=~"${opts.pods}",namespace="${opts.namespace}"}[${this.rateAccuracy}])) by (${opts.selector})`, + cpuUsage: `sum(rate(container_cpu_usage_seconds_total{container!="POD",container!="",image!="",pod=~"${opts.pods}",namespace="${opts.namespace}"}[${this.rateAccuracy}])) by (${opts.selector})`, cpuRequests: `sum(kube_pod_container_resource_requests{pod=~"${opts.pods}",resource="cpu",namespace="${opts.namespace}"}) by (${opts.selector})`, cpuLimits: `sum(kube_pod_container_resource_limits{pod=~"${opts.pods}",resource="cpu",namespace="${opts.namespace}"}) by (${opts.selector})`, - memoryUsage: `sum(container_memory_working_set_bytes{container!="POD",container!="",pod=~"${opts.pods}",namespace="${opts.namespace}"}) by (${opts.selector})`, + memoryUsage: `sum(container_memory_working_set_bytes{container!="POD",container!="",image!="",pod=~"${opts.pods}",namespace="${opts.namespace}"}) by (${opts.selector})`, memoryRequests: `sum(kube_pod_container_resource_requests{pod=~"${opts.pods}",resource="memory",namespace="${opts.namespace}"}) by (${opts.selector})`, memoryLimits: `sum(kube_pod_container_resource_limits{pod=~"${opts.pods}",resource="memory",namespace="${opts.namespace}"}) by (${opts.selector})`, fsUsage: `sum(container_fs_usage_bytes{container!="POD",container!="",pod=~"${opts.pods}",namespace="${opts.namespace}"}) by (${opts.selector})`,