helm template prometheus prometheus-community/kube-prometheus-stack \
--namespace monitoring \
--version 70.4.1 \
> kube-prometheus-stack-70.4.1.yaml
spec:
serviceMonitorSelector:
matchLabels:
release: "prometheus"
helm show values prometheus-community/prometheus-pushgateway > k8s/prometheus/pushgateway.yaml
serviceMonitor:
enabled: true
namespace: monitoring
...(중략)
additionalLabels:
release: "prometheus"
helm upgrade pushgateway prometheus-community/prometheus-pushgateway --namespace monitoring -f k8s/prometheus/pushgateway.yaml
jungahn@userui-MacBookPro-66 ~ % kubectl get servicemonitors -n monitoring
NAME AGE
prometheus-grafana 12d
prometheus-kube-prometheus-alertmanager 12d
prometheus-kube-prometheus-apiserver 12d
prometheus-kube-prometheus-coredns 12d
prometheus-kube-prometheus-kube-controller-manager 12d
prometheus-kube-prometheus-kube-etcd 12d
prometheus-kube-prometheus-kube-proxy 12d
prometheus-kube-prometheus-kube-scheduler 12d
prometheus-kube-prometheus-kubelet 12d
prometheus-kube-prometheus-operator 12d
prometheus-kube-prometheus-prometheus 12d
prometheus-kube-state-metrics 12d
prometheus-prometheus-node-exporter 12d
pushgateway-prometheus-pushgateway 2m39s
kubectl port-forward -n monitoring svc/prometheus-kube-prometheus-prometheus 9090:9090
http://localhost:9090/targets
에서 pushgateway 확인
up
내장 metric 을 통해 점검up
metric: Prometheus가 scrape 타겟 상태를 감시하기 위해 자동 생성하는 내장 metricup{job="pushgateway-prometheus-pushgateway"}
label 검색으로 up
내장 metric 으로 scrape 되고 있는지 점검from prometheus_client import CollectorRegistry, Gauge, push_to_gateway
def push_ml_metrics_to_gateway(
pushgateway_url: str,
job_name: str,
model: str,
stage: str,
accuracy: float,
precision: float
):
# 메트릭 등록을 위한 CollectorRegistry
registry = CollectorRegistry()
# Accuracy Gauge
acc_gauge = Gauge(
'mlflow_accuracy',
'MLflow Accuracy Score',
['model', 'stage'],
registry=registry
)
acc_gauge.labels(model=model, stage=stage).set(accuracy)
# Precision Gauge
prec_gauge = Gauge(
'mlflow_precision',
'MLflow Precision Score',
['model', 'stage'],
registry=registry
)
prec_gauge.labels(model=model, stage=stage).set(precision)
# Push
push_to_gateway(
pushgateway_url,
job=job_name,
registry=registry
)
push_ml_metrics_to_gateway(
# pushgateway_url='pushgateway.monitoring.svc.cluster.local:9091',
pushgateway_url='localhost:9091',
job_name='mlflow_training',
model='member_classification',
stage='train',
accuracy=0.97,
precision=0.87
)
print('done')