|
|
|
|
@ -1,97 +1,125 @@
|
|
|
|
|
package main
|
|
|
|
|
|
|
|
|
|
import (
|
|
|
|
|
"flag"
|
|
|
|
|
"fmt"
|
|
|
|
|
"log"
|
|
|
|
|
"math/rand"
|
|
|
|
|
"net/http"
|
|
|
|
|
"strings"
|
|
|
|
|
"time"
|
|
|
|
|
|
|
|
|
|
"github.com/gorilla/mux"
|
|
|
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
|
|
|
"github.com/prometheus/client_golang/prometheus/promhttp"
|
|
|
|
|
"net/http"
|
|
|
|
|
"sync"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
type ClusterManager struct {
|
|
|
|
|
sync.Mutex
|
|
|
|
|
Zone string
|
|
|
|
|
metricMapCounters map[string]string
|
|
|
|
|
metricMapGauges map[string]string
|
|
|
|
|
}
|
|
|
|
|
var (
|
|
|
|
|
addr = flag.String("listen-address", ":8080", "The address to listen on for HTTP requests.")
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
// Simulate prepare the data
|
|
|
|
|
func (c *ClusterManager) ReallyExpensiveAssessmentOfTheSystemState() (
|
|
|
|
|
metrics map[string]float64,
|
|
|
|
|
) {
|
|
|
|
|
metrics = map[string]float64{
|
|
|
|
|
"oom_crashes_total": 42.00,
|
|
|
|
|
"ram_usage": 6.023e23,
|
|
|
|
|
}
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
var (
|
|
|
|
|
requestTimes = prometheus.NewCounterVec(prometheus.CounterOpts{
|
|
|
|
|
Name: "request_times",
|
|
|
|
|
Help: "请求次数",
|
|
|
|
|
}, []string{"method", "url", "host"})
|
|
|
|
|
// 多个label,需要与requestTimes.WithLabelValues(strings.ToLower(r.Method), r.URL.String())一一对应,
|
|
|
|
|
// 效果为可用promql request_times{method="get",url="/metrics",host="172.20.16.185:8080"}查询
|
|
|
|
|
|
|
|
|
|
// 通过NewClusterManager方法创建结构体及对应的指标信息,代码如下所示。
|
|
|
|
|
// NewClusterManager creates the two Descs OOMCountDesc and RAMUsageDesc. Note
|
|
|
|
|
// that the zone is set as a ConstLabel. (It's different in each instance of the
|
|
|
|
|
// ClusterManager, but constant over the lifetime of an instance.) Then there is
|
|
|
|
|
// a variable label "host", since we want to partition the collected metrics by
|
|
|
|
|
// host. Since all Descs created in this way are consistent across instances,
|
|
|
|
|
// with a guaranteed distinction by the "zone" label, we can register different
|
|
|
|
|
// ClusterManager instances with the same registry.
|
|
|
|
|
func NewClusterManager(zone string) *ClusterManager {
|
|
|
|
|
return &ClusterManager{
|
|
|
|
|
Zone: zone,
|
|
|
|
|
metricMapGauges: map[string]string{
|
|
|
|
|
"ram_usage": "ram_usage_bytes",
|
|
|
|
|
},
|
|
|
|
|
metricMapCounters: map[string]string{
|
|
|
|
|
"oom_crashes": "oom_crashes_total",
|
|
|
|
|
},
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
requestTime = prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
|
|
|
|
Name: "request_time",
|
|
|
|
|
Help: "每次请求花费的时间",
|
|
|
|
|
}, []string{"method", "url", "host"})
|
|
|
|
|
|
|
|
|
|
//首先,采集器必须实现prometheus.Collector接口,也必须实现Describe和Collect方法。实现接口的代码如下所示。
|
|
|
|
|
// Describe simply sends the two Descs in the struct to the channel.
|
|
|
|
|
// Prometheus的注册器调用Collect来抓取参数
|
|
|
|
|
// 将收集的数据传递到Channel中并返回
|
|
|
|
|
// 收集的指标信息来自Describe,可以并发地执行抓取工作,但是必须要保证线程的安全
|
|
|
|
|
// 需要计算,开销大
|
|
|
|
|
requestSummary = prometheus.NewSummaryVec(prometheus.SummaryOpts{
|
|
|
|
|
Name: "request_time_summary",
|
|
|
|
|
Help: "summary test",
|
|
|
|
|
Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001},
|
|
|
|
|
// 0.5,0.9,0.99表示分位数,0.05,0.01,0.001表示可接收的误差,比如0.5分位的值为10,其实这个10可能是(0.45,0.55)这个区间内的某个值,太过精确会导致cpu压力过大
|
|
|
|
|
}, []string{"request_time"})
|
|
|
|
|
|
|
|
|
|
func (c *ClusterManager) Describe(ch chan<- *prometheus.Desc) {
|
|
|
|
|
// prometheus.NewDesc(prometheus.BuildFQName(namespace, "", metricName), docString, labels, nil)
|
|
|
|
|
for _, v := range c.metricMapGauges {
|
|
|
|
|
ch <- prometheus.NewDesc(prometheus.BuildFQName(c.Zone, "", v), v, nil, nil)
|
|
|
|
|
}
|
|
|
|
|
// Histogram只计数,不计算,开销小
|
|
|
|
|
// 线性桶
|
|
|
|
|
requestHistogram = prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
|
|
|
|
Name: "request_time_histogram",
|
|
|
|
|
Help: "histogram LinearBuckets test",
|
|
|
|
|
Buckets: prometheus.LinearBuckets(1, 2, 20), // 初始为1,间隔为2,一共20个取值,Buckets会隐式添加 `+Inf` 值作为取值区间的最大值
|
|
|
|
|
// request_time_histogram_bucket{instance="172.20.16.185:8080", job="my_metrics", le="1.0", request_time="request_time"}
|
|
|
|
|
// request_time_histogram_bucket{instance="172.20.16.185:8080", job="my_metrics", le="3.0", request_time="request_time"}
|
|
|
|
|
// ...
|
|
|
|
|
// 会生成如上所示的指标,le: 小于等于
|
|
|
|
|
// 即,比如第一个数据的值,表示为小于等于1.0的个数,此处即为响应时间小于等于1.0的请求的个数
|
|
|
|
|
}, []string{"request_time"})
|
|
|
|
|
|
|
|
|
|
for _, v := range c.metricMapCounters {
|
|
|
|
|
ch <- prometheus.NewDesc(prometheus.BuildFQName(c.Zone, "", v), v, nil, nil)
|
|
|
|
|
}
|
|
|
|
|
// 默认桶
|
|
|
|
|
requestHistogramDef = prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
|
|
|
|
Name: "request_time_histogram_def_buckets",
|
|
|
|
|
Help: "histogram DefBuckets test",
|
|
|
|
|
Buckets: prometheus.DefBuckets,
|
|
|
|
|
}, []string{"request_time"})
|
|
|
|
|
|
|
|
|
|
// 指数桶
|
|
|
|
|
requestHistogramExponentialBuckets = prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
|
|
|
|
Name: "request_time_histogram_exponential_buckets",
|
|
|
|
|
Help: "histogram ExponentialBuckets test",
|
|
|
|
|
Buckets: prometheus.ExponentialBuckets(1, 2, 20), // 初始值为1,指数为2增长,20个取值
|
|
|
|
|
}, []string{"request_time"})
|
|
|
|
|
// 不同的桶根据具体情况而定
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
func init() {
|
|
|
|
|
// Add Go module build info.
|
|
|
|
|
// 也可prometheus.MustRegister(xx, yy)
|
|
|
|
|
prometheus.MustRegister(requestTime)
|
|
|
|
|
prometheus.MustRegister(requestTimes)
|
|
|
|
|
prometheus.MustRegister(requestSummary)
|
|
|
|
|
prometheus.MustRegister(requestHistogram, requestHistogramDef, requestHistogramExponentialBuckets)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Collect方法是核心,它会抓取你需要的所有数据,根据需求对其进行分析,然后将指标发送回客户端库。
|
|
|
|
|
// 用于传递所有可能指标的定义描述符
|
|
|
|
|
// 可以在程序运行期间添加新的描述,收集新的指标信息
|
|
|
|
|
// 重复的描述符将被忽略。两个不同的Collector不要设置相同的描述符
|
|
|
|
|
func (c *ClusterManager) Collect(ch chan<- prometheus.Metric) {
|
|
|
|
|
c.Lock()
|
|
|
|
|
defer c.Unlock()
|
|
|
|
|
m := c.ReallyExpensiveAssessmentOfTheSystemState()
|
|
|
|
|
for k, v := range m {
|
|
|
|
|
t := prometheus.GaugeValue
|
|
|
|
|
if c.metricMapCounters[k] != "" {
|
|
|
|
|
t = prometheus.CounterValue
|
|
|
|
|
}
|
|
|
|
|
c.registerConstMetric(ch, k, v, t)
|
|
|
|
|
}
|
|
|
|
|
func main() {
|
|
|
|
|
flag.Parse()
|
|
|
|
|
r := mux.NewRouter()
|
|
|
|
|
r.HandleFunc("/", handler)
|
|
|
|
|
r.Use(metricsMiddleware)
|
|
|
|
|
|
|
|
|
|
r.Handle("/metrics", promhttp.HandlerFor(
|
|
|
|
|
prometheus.DefaultGatherer,
|
|
|
|
|
promhttp.HandlerOpts{
|
|
|
|
|
// Opt into OpenMetrics to support exemplars.
|
|
|
|
|
EnableOpenMetrics: true,
|
|
|
|
|
},
|
|
|
|
|
))
|
|
|
|
|
log.Fatal(http.ListenAndServe(*addr, r))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// 用于传递所有可能指标的定义描述符给指标
|
|
|
|
|
func (c *ClusterManager) registerConstMetric(ch chan<- prometheus.Metric, metric string, val float64, valType prometheus.ValueType, labelValues ...string) {
|
|
|
|
|
descr := prometheus.NewDesc(prometheus.BuildFQName(c.Zone, "", metric), metric, nil, nil)
|
|
|
|
|
if m, err := prometheus.NewConstMetric(descr, valType, val, labelValues...); err == nil {
|
|
|
|
|
ch <- m
|
|
|
|
|
}
|
|
|
|
|
func metricsMiddleware(next http.Handler) http.Handler {
|
|
|
|
|
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
|
|
|
start := time.Now().Unix()
|
|
|
|
|
next.ServeHTTP(w, r)
|
|
|
|
|
end := time.Now().Unix()
|
|
|
|
|
requestTime.WithLabelValues(strings.ToLower(r.Method), r.URL.String(), r.Host).Set(float64(end - start))
|
|
|
|
|
requestTimes.WithLabelValues(strings.ToLower(r.Method), r.URL.String(), r.Host).Add(1)
|
|
|
|
|
requestSummary.WithLabelValues("request_time").Observe(float64(end - start))
|
|
|
|
|
requestHistogram.WithLabelValues("request_time").Observe(float64(end - start))
|
|
|
|
|
requestHistogramDef.WithLabelValues("request_time").Observe(float64(end - start))
|
|
|
|
|
requestHistogramExponentialBuckets.WithLabelValues("request_time").Observe(float64(end - start))
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func main() {
|
|
|
|
|
workerCA := NewClusterManager("xiaodian")
|
|
|
|
|
reg := prometheus.NewPedanticRegistry()
|
|
|
|
|
reg.MustRegister(workerCA)
|
|
|
|
|
//当promhttp.Handler()被执行时,所有metric被序列化输出。题外话,其实输出的格式既可以是plain text,也可以是protocol Buffers。
|
|
|
|
|
http.Handle("/metrics", promhttp.HandlerFor(reg, promhttp.HandlerOpts{}))
|
|
|
|
|
http.ListenAndServe(":9100", nil)
|
|
|
|
|
func handler(w http.ResponseWriter, r *http.Request) {
|
|
|
|
|
x := rand.Intn(3)
|
|
|
|
|
time.Sleep(time.Duration(x) * time.Second)
|
|
|
|
|
_, _ = w.Write([]byte("handler response"))
|
|
|
|
|
fmt.Println("print handler")
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// 清除数据,各项数据都会清零
|
|
|
|
|
func dataClear() {
|
|
|
|
|
requestTime.Reset()
|
|
|
|
|
requestTimes.Reset()
|
|
|
|
|
requestHistogram.Reset()
|
|
|
|
|
requestHistogramExponentialBuckets.Reset()
|
|
|
|
|
requestHistogramDef.Reset()
|
|
|
|
|
requestSummary.Reset()
|
|
|
|
|
}
|
|
|
|
|
|