Commit d768cea2 by tingweiwang

更新k8s yaml

1 parent f20cebf0
Showing with 1309 additions and 128 deletions
......@@ -60,8 +60,8 @@ spec:
cpu: "200m"
memory: "512Mi"
limits:
cpu: "1000m"
memory: "1024Mi"
cpu: "2000m"
memory: "3000Mi"
volumes:
- name: adl-volume
persistentVolumeClaim:
......
wapiVersion: v1
apiVersion: v1
kind: Service
metadata:
name: kpl--frontend
......
......@@ -48,8 +48,8 @@ spec:
cpu: "200m"
memory: "512Mi"
limits:
cpu: "1000m"
memory: "1024Mi"
cpu: "4000m"
memory: "4096Mi"
volumes:
- name: kpl-volume
persistentVolumeClaim:
......
......@@ -12,3 +12,7 @@ spec:
name: port-80
targetPort: 80
nodePort: 30180
- port: 81
name: port-81
targetPort: 81
nodePort: 30181
......@@ -53,9 +53,6 @@ spec:
- key: apiserver.conf
path: default.conf
- key: nginx.conf
path: nginx.conf
path: nginx.conf
- key: apiserver-edu.conf
path: default-edu.conf
......@@ -29,16 +29,21 @@ spec:
ports:
- containerPort: 9999
name: port-9999
- containerPort: 8080
name: wport-8080
livenessProbe:
tcpSocket:
port: port-9999
initialDelaySeconds: 10
httpGet:
path: /health
port: wport-8080
scheme: HTTP
resources:
requests:
cpu: "300m"
memory: "300Mi"
limits:
cpu: "1000m"
memory: "1024Mi"
cpu: "2000m"
memory: "2048Mi"
volumes:
- name: config
configMap:
......
......@@ -36,6 +36,8 @@ spec:
ports:
- containerPort: 8080
name: wport-8080
- containerPort: 8555
name: wport-8555
livenessProbe:
initialDelaySeconds: 10
httpGet:
......
# kpl-launcher部署文档
作者:刘弘也
## 依赖安装
本服务包含如下依赖
* Kubernetes 1.15+,支持CRD
* volcano v0.4.0
其中volcano可以使用本项目提供的部署脚本`k8s/volcano/*.yaml`进行安装,和官方版本的差异主要是把其镜像被搬运到了金山云镜像仓库,以加速部署速度:
* 安装volcano:`kubectl apply -f k8s/volcano`
* 检查`volcano-system`命名空间及其内部的Pod是否全部成功启动,如下所示:
```shell
$ kubectl get pods -n volcano-system
NAME READY STATUS RESTARTS AGE
volcano-admission-7b498d4d56-7djqg 1/1 Running 0 4d23h
volcano-admission-init-7sqzw 0/1 Completed 0 4d23h
volcano-controllers-68d55f9444-sq4vt 1/1 Running 0 4d23h
volcano-scheduler-7cc766767b-hvbvb 1/1 Running 0 4d23h
```
## kpl-launcher安装
目前kpl-launcher服务进行部署的过程包含如下内容
* (事先准备好的)命名空间:"kpl"
* `k8s/kpl-launcher/rbac.yaml`
* ServiceAccount:"kpl-launcher"
* ClusterRole:"kpl-launcher"
* ClusterRoleBinding:"kpl-launcher"
* `k8s/kpl-launcher/deployment.yaml`
* Deployment:"kpl-launcher"
* 服务启动命令的可选参数
```shell
$ kpl_launcher --help
Usage of ./build/bin/kpl_launcher:
-address string
service listening address (default "[::]")
-port int
service listening port (default 8000)
-private-key string
private key for ssl/tls secured service
-cert-chain string
certificate chain for ssl/tls secured service
-incluster
if use incluster config
-local-config string
(optional) absolute path to the kubeconfig file (default "~/.kube/config")
```
* Service (默认为ClusterIP类型):"kpl-launcher-service"
具体部署步骤如下:
* 联系相关开发负责人(刘弘也)确认当前要部署的镜像版本,如:`hub.kce.ksyun.com/aivc-kpl/kpl-launcher:launcher-9efebf5`
* 修改`k8s/kpl-launcher/deployment.yaml`中的部署镜像:
```shell
export IMAGE_NAME=hub.kce.ksyun.com/aivc-kpl/kpl-launcher:launcher-f2d3958
sed -i "s/image: .*/image: ${IMAGE_NAME}/" k8s/kpl-launcher/deployment.yaml
```
* 创建命名空间kpl
```shell
kubectl create ns kpl
```
* 准备好ssl/tls的证书文件
* 使用openssl生成自签名证书(注意指定CN):
```shell
mkdir certs
# openssl req -newkey rsa:2048 -nodes -keyout certs/server.key -x509 -days 3650 -out certs/server.crt -subj "/CN=KPL"
openssl ecparam -genkey -name secp384r1 -out server.key
openssl req -new -x509 -sha256 -key server.key -out server.crt -days 3650
```
* 生成一个叫kpl-ssl的ConfigMap,其包含刚才生成的两个ssl证书文件:
```shell
kubectl -n kpl create configmap kpl-ssl --from-file=./certs
```
* 注意保留`certs`里的证书文件,客户端程序也需要使用。
* 安装kpl-launcher
```shell
kubectl apply -f k8s/kpl-launcher/rbac.yaml
kubectl apply -f k8s/kpl-launcher/deployment.yaml
```
* 检查kpl-launcher服务是否已经启动
```shell
$ kubectl logs -n kpl kpl-launcher-5b9b6d74bc-swhmm
I0526 13:01:17.015731 6 main.go:82] Start in secured mode ...
I0526 13:01:17.020704 6 launcher.go:44] new launcher with the following backends:
I0526 13:01:17.020714 6 launcher.go:46] volcano: &{0xc00036b680 0xc000402940 map[]}
I0526 13:01:17.020727 6 launcher.go:46] simple_job: &{0xc00036b680 map[]}
I0526 13:01:17.020742 6 main.go:96] try to start launcher server at [::]:8000
```
#!/bin/bash
#wangtingwei
#由于各个环境该目录不一致,所以执行此脚本前需要进入对应的项目目录,这里k8s_yaml是相对路径。不能写绝对路径。
IMAGE_NAME=`cat image_list.txt` #获取最新launcher镜像完整名字
PRI_IMAGE_NAME=`echo $IMAGE_NAME |awk -F '/' '{print $NF}'` #截断只保留镜像名字和tag部分
date=`date +%F-%H-%M`
####################################################################
kubectl get ns |grep volcano-system
if [ $? -eq 0 ]; then
echo "volcano-system namespace already exit,continue operation"
else
echo "namespace not found, autocreate namespace volcano-system" && kubectl create namespace volcano-system
fi
####################################################################
sed_image_name () {
echo "拷贝yaml到临时目录,公有云环境下sed修改镜像"
sleep 2
cp -a k8s_yaml/kpl-launcher/deployment.yaml /tmp/deployment-$date.yaml
sed -i s@IMAGE_NAME@$IMAGE_NAME@g /tmp/deployment-$date.yaml
}
####################################################################
private_sed_image_name() {
cp -a k8s_yaml/volcano/volcano-development.yaml /tmp/volcano-development-$date.yaml #拷贝到临时目录,sed不修改模板文件。这样就避免了sed修改出错的问题,不需要在recover sed
cp -a k8s_yaml/kpl-launcher/deployment.yaml /tmp/deployment-$date.yaml
sed -i s@hub.kce.ksyun.com/aivc/volcanosh/vc-scheduler:latest@harbor_host/k8s/vc-scheduler:latest@g /tmp/volcano-development-$date.yaml
sed -i s@hub.kce.ksyun.com/aivc/volcanosh/vc-webhook-manager:latest@harbor_host/k8s/vc-webhook-manager:latest@g /tmp/volcano-development-$date.yaml
sed -i s@hub.kce.ksyun.com/aivc/volcanosh/vc-controller-manager:latest@harbor_host/k8s/vc-controller-manager:latest@g /tmp/volcano-development-$date.yaml
sed -i s@IMAGE_NAME@harbor_host/k8s/$PRI_IMAGE_NAME@g /tmp/deployment-$date.yaml
}
######################################################################
delete_server () {
kubectl delete -f k8s_yaml/kpl-ssl-configmap.yaml
kubectl delete -f k8s_yaml/kpl-ssl-configmap-autodl.yaml
kubectl delete -f k8s_yaml/volcano/ && sleep 2
kubectl delete -f k8s_yaml/kpl-launcher/ && sleep 2
kubectl delete secrets -n volcano-system volcano-admission-secret
}
create_server () {
kubectl apply -f k8s_yaml/kpl-ssl-configmap.yaml
kubectl apply -f k8s_yaml/kpl-ssl-configmap-autodl.yaml
kubectl apply -f k8s_yaml/kpl-launcher/rbac.yaml
kubectl apply -f k8s_yaml/volcano/ #在/tmp目录下创建volcano和kpl-launcher服务
kubectl apply -f /tmp/deployment-$date.yaml && sleep 3
}
private_create_server () {
kubectl apply -f k8s_yaml/kpl-ssl-configmap.yaml
kubectl apply -f k8s_yaml/kpl-ssl-configmap-autodl.yaml
kubectl apply -f k8s_yaml/kpl-launcher/rbac.yaml
kubectl apply -f /tmp/volcano-development-$date.yaml && sleep 3 #在/tmp目录下创建volcano和kpl-launcher服务
kubectl apply -f /tmp/deployment-$date.yaml && sleep 3
}
redeploy_all () {
delete_server
sed_image_name
create_server
}
private_deploy () {
delete_server
private_sed_image_name
private_create_server
}
case $1 in
redeploy_all)
redeploy_all
;;
private_deploy)
private_deploy
;;
*)
echo "please input (redeploy_all or private_deploy)"
esac
hub.kce.ksyun.com/aivc-kpl/kpl-launcher:launcher-854c5ac
\ No newline at end of file
......@@ -18,7 +18,7 @@ spec:
serviceAccount: kpl-launcher
containers:
- name: launcher
image: hub.kce.ksyun.com/aivc-kpl/kpl-launcher:launcher-d8597db
image: IMAGE_NAME #镜像仓库以及名字变量模板
command:
- /bin/bash
- -c
......
# kpl-launcher部署文档
作者:刘弘也
## 依赖安装
本服务包含如下依赖
* Kubernetes 1.15+,支持CRD
* volcano v0.4.0
其中volcano可以使用本项目提供的部署脚本`k8s/volcano/*.yaml`进行安装,和官方版本的差异主要是把其镜像被搬运到了金山云镜像仓库,以加速部署速度:
* 安装volcano:`kubectl apply -f k8s/volcano`
* 检查`volcano-system`命名空间及其内部的Pod是否全部成功启动,如下所示:
```shell
$ kubectl get pods -n volcano-system
NAME READY STATUS RESTARTS AGE
volcano-admission-7b498d4d56-7djqg 1/1 Running 0 4d23h
volcano-admission-init-7sqzw 0/1 Completed 0 4d23h
volcano-controllers-68d55f9444-sq4vt 1/1 Running 0 4d23h
volcano-scheduler-7cc766767b-hvbvb 1/1 Running 0 4d23h
```
## kpl-launcher安装
目前kpl-launcher服务进行部署的过程包含如下内容
* (事先准备好的)命名空间:"kpl"
* `k8s/kpl-launcher/rbac.yaml`
* ServiceAccount:"kpl-launcher"
* ClusterRole:"kpl-launcher"
* ClusterRoleBinding:"kpl-launcher"
* `k8s/kpl-launcher/deployment.yaml`
* Deployment:"kpl-launcher"
* 服务启动命令的可选参数
```shell
$ kpl_launcher --help
Usage of ./build/bin/kpl_launcher:
-address string
service listening address (default "[::]")
-port int
service listening port (default 8000)
-private-key string
private key for ssl/tls secured service
-cert-chain string
certificate chain for ssl/tls secured service
-incluster
if use incluster config
-local-config string
(optional) absolute path to the kubeconfig file (default "~/.kube/config")
```
* Service (默认为ClusterIP类型):"kpl-launcher-service"
具体部署步骤如下:
* 联系相关开发负责人(刘弘也)确认当前要部署的镜像版本,如:`hub.kce.ksyun.com/aivc-kpl/kpl-launcher:launcher-9efebf5`
* 修改`k8s/kpl-launcher/deployment.yaml`中的部署镜像:
```shell
export IMAGE_NAME=hub.kce.ksyun.com/aivc-kpl/kpl-launcher:launcher-f2d3958
sed -i "s/image: .*/image: ${IMAGE_NAME}/" k8s/kpl-launcher/deployment.yaml
```
* 创建命名空间kpl
```shell
kubectl create ns kpl
```
* 准备好ssl/tls的证书文件
* 使用openssl生成自签名证书(注意指定CN):
```shell
mkdir certs
# openssl req -newkey rsa:2048 -nodes -keyout certs/server.key -x509 -days 3650 -out certs/server.crt -subj "/CN=KPL"
openssl ecparam -genkey -name secp384r1 -out server.key
openssl req -new -x509 -sha256 -key server.key -out server.crt -days 3650
```
* 生成一个叫kpl-ssl的ConfigMap,其包含刚才生成的两个ssl证书文件:
```shell
kubectl -n kpl create configmap kpl-ssl --from-file=./certs
```
* 注意保留`certs`里的证书文件,客户端程序也需要使用。
* 安装kpl-launcher
```shell
kubectl apply -f k8s/kpl-launcher/rbac.yaml
kubectl apply -f k8s/kpl-launcher/deployment.yaml
```
* 检查kpl-launcher服务是否已经启动
```shell
$ kubectl logs -n kpl kpl-launcher-5b9b6d74bc-swhmm
I0526 13:01:17.015731 6 main.go:82] Start in secured mode ...
I0526 13:01:17.020704 6 launcher.go:44] new launcher with the following backends:
I0526 13:01:17.020714 6 launcher.go:46] volcano: &{0xc00036b680 0xc000402940 map[]}
I0526 13:01:17.020727 6 launcher.go:46] simple_job: &{0xc00036b680 map[]}
I0526 13:01:17.020742 6 main.go:96] try to start launcher server at [::]:8000
```
#!/bin/bash
#wangtingwei
#由于各个环境该目录不一致,所以执行此脚本前需要进入对应的项目目录,这里k8s_yaml是相对路径。不能写绝对路径。
IMAGE_NAME=`cat image_list.txt` #获取最新launcher镜像完整名字
PRI_IMAGE_NAME=`echo $IMAGE_NAME |awk -F '/' '{print $NF}'` #截断只保留镜像名字和tag部分
date=`date +%F-%H-%M`
####################################################################
kubectl get ns |grep volcano-system
if [ $? -eq 0 ]; then
echo "volcano-system namespace already exit,continue operation"
else
echo "namespace not found, autocreate namespace volcano-system" && kubectl create namespace volcano-system
fi
####################################################################
sed_image_name () {
echo "拷贝yaml到临时目录,公有云环境下sed修改镜像"
sleep 2
cp -a k8s_yaml/kpl-launcher/deployment.yaml /tmp/deployment-$date.yaml
sed -i s@IMAGE_NAME@$IMAGE_NAME@g /tmp/deployment-$date.yaml
}
####################################################################
private_sed_image_name() {
cp -a k8s_yaml/volcano/volcano-development.yaml /tmp/volcano-development-$date.yaml #拷贝到临时目录,sed不修改模板文件。这样就避免了sed修改出错的问题,不需要在recover sed
cp -a k8s_yaml/kpl-launcher/deployment.yaml /tmp/deployment-$date.yaml
sed -i s@hub.kce.ksyun.com/aivc/volcanosh/vc-scheduler:latest@harbor_host/k8s/vc-scheduler:latest@g /tmp/volcano-development-$date.yaml
sed -i s@hub.kce.ksyun.com/aivc/volcanosh/vc-webhook-manager:latest@harbor_host/k8s/vc-webhook-manager:latest@g /tmp/volcano-development-$date.yaml
sed -i s@hub.kce.ksyun.com/aivc/volcanosh/vc-controller-manager:latest@harbor_host/k8s/vc-controller-manager:latest@g /tmp/volcano-development-$date.yaml
sed -i s@IMAGE_NAME@harbor_host/k8s/$PRI_IMAGE_NAME@g /tmp/deployment-$date.yaml
}
######################################################################
delete_server () {
kubectl delete -f k8s_yaml/kpl-ssl-configmap.yaml
kubectl delete -f k8s_yaml/kpl-ssl-configmap-autodl.yaml
kubectl delete -f k8s_yaml/volcano/ && sleep 2
kubectl delete -f k8s_yaml/kpl-launcher/ && sleep 2
kubectl delete secrets -n volcano-system volcano-admission-secret
}
create_server () {
kubectl apply -f k8s_yaml/kpl-ssl-configmap.yaml
kubectl apply -f k8s_yaml/kpl-ssl-configmap-autodl.yaml
kubectl apply -f k8s_yaml/kpl-launcher/rbac.yaml
kubectl apply -f k8s_yaml/volcano/ #在/tmp目录下创建volcano和kpl-launcher服务
kubectl apply -f /tmp/deployment-$date.yaml && sleep 3
}
private_create_server () {
kubectl apply -f k8s_yaml/kpl-ssl-configmap.yaml
kubectl apply -f k8s_yaml/kpl-ssl-configmap-autodl.yaml
kubectl apply -f k8s_yaml/kpl-launcher/rbac.yaml
kubectl apply -f /tmp/volcano-development-$date.yaml && sleep 3 #在/tmp目录下创建volcano和kpl-launcher服务
kubectl apply -f /tmp/deployment-$date.yaml && sleep 3
}
redeploy_all () {
delete_server
sed_image_name
create_server
}
private_deploy () {
delete_server
private_sed_image_name
private_create_server
}
case $1 in
redeploy_all)
redeploy_all
;;
private_deploy)
private_deploy
;;
*)
echo "please input (redeploy_all or private_deploy)"
esac
hub.kce.ksyun.com/aivc-kpl/kpl-launcher:launcher-854c5ac
\ No newline at end of file
-----BEGIN CERTIFICATE-----
MIICKTCCAbCgAwIBAgIJAOEzff/TB45/MAoGCCqGSM49BAMCMFMxCzAJBgNVBAYT
AkFVMRMwEQYDVQQIDApTb21lLVN0YXRlMSEwHwYDVQQKDBhJbnRlcm5ldCBXaWRn
aXRzIFB0eSBMdGQxDDAKBgNVBAMMA0tQTDAeFw0yMDA2MDMwMzA2MDRaFw0zMDA2
MDEwMzA2MDRaMFMxCzAJBgNVBAYTAkFVMRMwEQYDVQQIDApTb21lLVN0YXRlMSEw
HwYDVQQKDBhJbnRlcm5ldCBXaWRnaXRzIFB0eSBMdGQxDDAKBgNVBAMMA0tQTDB2
MBAGByqGSM49AgEGBSuBBAAiA2IABDUlvu7GTOB4kYiqBuRqiLU3chqccZhMFL16
olmMX31M8EWA0VXj5JeMo4js7NcuBRfFp2JIdhqOroodz+Bu64nmhjbr7Qkglk14
XguoUrwycTAlba2JdBpXRXwY5uP7eqNQME4wHQYDVR0OBBYEFPQ81JjaE8UG4FyX
Hjo09H9dRkcEMB8GA1UdIwQYMBaAFPQ81JjaE8UG4FyXHjo09H9dRkcEMAwGA1Ud
EwQFMAMBAf8wCgYIKoZIzj0EAwIDZwAwZAIwSCzsAdwv5fJOlAMI6W+0s5whygR3
VQEq88EffPmjQ8Cn6rqWFzev4Cd5W18Qput9AjAjoBh5WdlK1N0sIZpRLaCYK7El
2vab3X1CbV8MkwGJU7Vnjav+w185kSNpbpF6idw=
-----END CERTIFICATE-----
-----BEGIN EC PARAMETERS-----
BgUrgQQAIg==
-----END EC PARAMETERS-----
-----BEGIN EC PRIVATE KEY-----
MIGkAgEBBDCc8hpwAUrmEZUnFeD4Fi/OnMT2fAXVtJ50FIR/HCWMD/pPDV1uKLZI
Hm6h6fRQX82gBwYFK4EEACKhZANiAAQ1Jb7uxkzgeJGIqgbkaoi1N3IanHGYTBS9
eqJZjF99TPBFgNFV4+SXjKOI7OzXLgUXxadiSHYajq6KHc/gbuuJ5oY26+0JIJZN
eF4LqFK8MnEwJW2tiXQaV0V8GObj+3o=
-----END EC PRIVATE KEY-----
kind: Deployment
apiVersion: apps/v1
metadata:
name: kpl-launcher
namespace: kpl
labels:
app: kpl-launcher
spec:
replicas: 1
selector:
matchLabels:
app: kpl-launcher
template:
metadata:
labels:
app: kpl-launcher
spec:
serviceAccount: kpl-launcher
containers:
- name: launcher
image: IMAGE_NAME #镜像仓库以及名字变量模板
command:
- /bin/bash
- -c
- kpl_launcher --incluster --private-key /etc/kpl/ssl/server.key --cert-chain /etc/kpl/ssl/server.crt --port 8000 2>&1
ports:
- containerPort: 8000
name: launcher-port
imagePullPolicy: "IfNotPresent"
resources:
requests: #新增加request。降低资源调度要求
cpu: 1
memory: 100Mi
limits:
cpu: 8
memory: 100Mi
env:
- name: KPL_IMAGE_SECRET_NAME
value: kpl-regcred
volumeMounts:
- name: kpl-ssl
mountPath: /etc/kpl/ssl
readOnly: true
volumes:
- name: kpl-ssl
configMap:
name: kpl-ssl
imagePullSecrets:
- name: kpl-regcred
---
apiVersion: v1
kind: Service
metadata:
labels:
app: kpl-launcher
name: kpl-launcher-service
namespace: kpl
spec:
ports:
- port: 8000
protocol: TCP
targetPort: 8000
# type: NodePort
selector:
app: kpl-launcher
apiVersion: v1
kind: ServiceAccount
metadata:
name: kpl-launcher
namespace: kpl
---
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: kpl-launcher
rules:
- apiGroups: ["apiextensions.k8s.io"]
resources: ["customresourcedefinitions"]
verbs: ["get", "list", "watch"]
- apiGroups: ["batch.volcano.sh"]
resources: ["jobs"]
verbs: ["get", "create", "list", "watch", "update", "delete"]
- apiGroups: [""]
resources: ["pods", "pods/status"]
verbs: ["create", "get", "list", "watch", "update", "bind", "updateStatus", "delete"]
- apiGroups: [""]
resources: ["persistentvolumeclaims"]
verbs: ["list", "watch"]
- apiGroups: [""]
resources: ["persistentvolumes"]
verbs: ["list", "watch"]
- apiGroups: [""]
resources: ["services"]
verbs: ["list", "watch"]
- apiGroups: ["apps"]
resources: ["deployments"]
verbs: ["list", "watch"]
- apiGroups: [""]
resources: ["namespaces"]
verbs: ["list", "watch"]
- apiGroups: ["storage.k8s.io"]
resources: ["storageclasses"]
verbs: ["list", "watch"]
- apiGroups: [""]
resources: ["nodes"]
verbs: ["list", "watch"]
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: kpl-launcher
subjects:
- kind: ServiceAccount
name: kpl-launcher
namespace: kpl
roleRef:
kind: ClusterRole
name: kpl-launcher
apiGroup: rbac.authorization.k8s.io
apiVersion: v1
data:
server.crt: |
-----BEGIN CERTIFICATE-----
MIICKTCCAbCgAwIBAgIJAOEzff/TB45/MAoGCCqGSM49BAMCMFMxCzAJBgNVBAYT
AkFVMRMwEQYDVQQIDApTb21lLVN0YXRlMSEwHwYDVQQKDBhJbnRlcm5ldCBXaWRn
aXRzIFB0eSBMdGQxDDAKBgNVBAMMA0tQTDAeFw0yMDA2MDMwMzA2MDRaFw0zMDA2
MDEwMzA2MDRaMFMxCzAJBgNVBAYTAkFVMRMwEQYDVQQIDApTb21lLVN0YXRlMSEw
HwYDVQQKDBhJbnRlcm5ldCBXaWRnaXRzIFB0eSBMdGQxDDAKBgNVBAMMA0tQTDB2
MBAGByqGSM49AgEGBSuBBAAiA2IABDUlvu7GTOB4kYiqBuRqiLU3chqccZhMFL16
olmMX31M8EWA0VXj5JeMo4js7NcuBRfFp2JIdhqOroodz+Bu64nmhjbr7Qkglk14
XguoUrwycTAlba2JdBpXRXwY5uP7eqNQME4wHQYDVR0OBBYEFPQ81JjaE8UG4FyX
Hjo09H9dRkcEMB8GA1UdIwQYMBaAFPQ81JjaE8UG4FyXHjo09H9dRkcEMAwGA1Ud
EwQFMAMBAf8wCgYIKoZIzj0EAwIDZwAwZAIwSCzsAdwv5fJOlAMI6W+0s5whygR3
VQEq88EffPmjQ8Cn6rqWFzev4Cd5W18Qput9AjAjoBh5WdlK1N0sIZpRLaCYK7El
2vab3X1CbV8MkwGJU7Vnjav+w185kSNpbpF6idw=
-----END CERTIFICATE-----
server.key: |
-----BEGIN EC PARAMETERS-----
BgUrgQQAIg==
-----END EC PARAMETERS-----
-----BEGIN EC PRIVATE KEY-----
MIGkAgEBBDCc8hpwAUrmEZUnFeD4Fi/OnMT2fAXVtJ50FIR/HCWMD/pPDV1uKLZI
Hm6h6fRQX82gBwYFK4EEACKhZANiAAQ1Jb7uxkzgeJGIqgbkaoi1N3IanHGYTBS9
eqJZjF99TPBFgNFV4+SXjKOI7OzXLgUXxadiSHYajq6KHc/gbuuJ5oY26+0JIJZN
eF4LqFK8MnEwJW2tiXQaV0V8GObj+3o=
-----END EC PRIVATE KEY-----
kind: ConfigMap
metadata:
name: kpl-ssl
namespace: autodl
apiVersion: v1
data:
server.crt: |
-----BEGIN CERTIFICATE-----
MIICKTCCAbCgAwIBAgIJAOEzff/TB45/MAoGCCqGSM49BAMCMFMxCzAJBgNVBAYT
AkFVMRMwEQYDVQQIDApTb21lLVN0YXRlMSEwHwYDVQQKDBhJbnRlcm5ldCBXaWRn
aXRzIFB0eSBMdGQxDDAKBgNVBAMMA0tQTDAeFw0yMDA2MDMwMzA2MDRaFw0zMDA2
MDEwMzA2MDRaMFMxCzAJBgNVBAYTAkFVMRMwEQYDVQQIDApTb21lLVN0YXRlMSEw
HwYDVQQKDBhJbnRlcm5ldCBXaWRnaXRzIFB0eSBMdGQxDDAKBgNVBAMMA0tQTDB2
MBAGByqGSM49AgEGBSuBBAAiA2IABDUlvu7GTOB4kYiqBuRqiLU3chqccZhMFL16
olmMX31M8EWA0VXj5JeMo4js7NcuBRfFp2JIdhqOroodz+Bu64nmhjbr7Qkglk14
XguoUrwycTAlba2JdBpXRXwY5uP7eqNQME4wHQYDVR0OBBYEFPQ81JjaE8UG4FyX
Hjo09H9dRkcEMB8GA1UdIwQYMBaAFPQ81JjaE8UG4FyXHjo09H9dRkcEMAwGA1Ud
EwQFMAMBAf8wCgYIKoZIzj0EAwIDZwAwZAIwSCzsAdwv5fJOlAMI6W+0s5whygR3
VQEq88EffPmjQ8Cn6rqWFzev4Cd5W18Qput9AjAjoBh5WdlK1N0sIZpRLaCYK7El
2vab3X1CbV8MkwGJU7Vnjav+w185kSNpbpF6idw=
-----END CERTIFICATE-----
server.key: |
-----BEGIN EC PARAMETERS-----
BgUrgQQAIg==
-----END EC PARAMETERS-----
-----BEGIN EC PRIVATE KEY-----
MIGkAgEBBDCc8hpwAUrmEZUnFeD4Fi/OnMT2fAXVtJ50FIR/HCWMD/pPDV1uKLZI
Hm6h6fRQX82gBwYFK4EEACKhZANiAAQ1Jb7uxkzgeJGIqgbkaoi1N3IanHGYTBS9
eqJZjF99TPBFgNFV4+SXjKOI7OzXLgUXxadiSHYajq6KHc/gbuuJ5oY26+0JIJZN
eF4LqFK8MnEwJW2tiXQaV0V8GObj+3o=
-----END EC PRIVATE KEY-----
kind: ConfigMap
metadata:
creationTimestamp: "2020-06-03T03:06:22Z"
name: kpl-ssl
namespace: kpl
resourceVersion: "61559587"
selfLink: /api/v1/namespaces/kpl/configmaps/kpl-ssl
uid: 4c6174b1-3847-4c29-a7e7-fcd7b6e011e9
# Source: volcano/templates/scheduler.yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: volcano-scheduler-configmap
namespace: volcano-system
data:
volcano-scheduler.conf: |
actions: "enqueue, allocate, backfill"
tiers:
- plugins:
- name: priority
- name: gang
- name: conformance
- plugins:
- name: drf
- name: predicates
- name: proportion
- name: nodeorder
- name: binpack
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: volcano-scheduler
namespace: volcano-system
---
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: volcano-scheduler
rules:
- apiGroups: ["apiextensions.k8s.io"]
resources: ["customresourcedefinitions"]
verbs: ["create", "get", "list", "watch", "delete"]
- apiGroups: ["batch.volcano.sh"]
resources: ["jobs"]
verbs: ["get", "list", "watch", "update", "delete"]
- apiGroups: ["batch.volcano.sh"]
resources: ["jobs/status"]
verbs: ["update", "patch"]
- apiGroups: [""]
resources: ["events"]
verbs: ["create", "list", "watch", "update", "patch"]
- apiGroups: [""]
resources: ["pods", "pods/status"]
verbs: ["create", "get", "list", "watch", "update", "bind", "updateStatus", "delete"]
- apiGroups: [""]
resources: ["pods/binding"]
verbs: ["create"]
- apiGroups: [""]
resources: ["persistentvolumeclaims"]
verbs: ["list", "watch"]
- apiGroups: [""]
resources: ["persistentvolumes"]
verbs: ["list", "watch"]
- apiGroups: [""]
resources: ["namespaces"]
verbs: ["list", "watch"]
- apiGroups: [""]
resources: ["resourcequotas"]
verbs: ["list", "watch"]
- apiGroups: ["storage.k8s.io"]
resources: ["storageclasses"]
verbs: ["list", "watch"]
- apiGroups: [""]
resources: ["nodes"]
verbs: ["list", "watch"]
- apiGroups: ["policy"]
resources: ["poddisruptionbudgets"]
verbs: ["list", "watch"]
- apiGroups: ["scheduling.k8s.io"]
resources: ["priorityclasses"]
verbs: ["get", "list", "watch"]
- apiGroups: ["scheduling.incubator.k8s.io", "scheduling.volcano.sh"]
resources: ["queues"]
verbs: ["get", "list", "watch", "create", "delete"]
- apiGroups: ["scheduling.incubator.k8s.io", "scheduling.volcano.sh"]
resources: ["podgroups"]
verbs: ["list", "watch", "update"]
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: volcano-scheduler-role
subjects:
- kind: ServiceAccount
name: volcano-scheduler
namespace: volcano-system
roleRef:
kind: ClusterRole
name: volcano-scheduler
apiGroup: rbac.authorization.k8s.io
---
kind: Deployment
apiVersion: apps/v1
metadata:
name: volcano-scheduler
namespace: volcano-system
labels:
app: volcano-scheduler
spec:
replicas: 1
selector:
matchLabels:
app: volcano-scheduler
template:
metadata:
labels:
app: volcano-scheduler
spec:
serviceAccount: volcano-scheduler
containers:
- name: volcano-scheduler
image: hub.kce.ksyun.com/aivc/volcanosh/vc-scheduler:latest
args:
- --logtostderr
- --scheduler-conf=/volcano.scheduler/volcano-scheduler.conf
- -v=3
- 2>&1
imagePullPolicy: "IfNotPresent"
volumeMounts:
- name: scheduler-config
mountPath: /volcano.scheduler
volumes:
- name: scheduler-config
configMap:
name: volcano-scheduler-configmap
---
# Source: volcano/templates/admission.yaml
apiVersion: v1
kind: ServiceAccount
metadata:
name: volcano-admission
namespace: volcano-system
---
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: volcano-admission
rules:
- apiGroups: [""]
resources: ["configmaps"]
verbs: ["get", "list", "watch"]
- apiGroups: ["admissionregistration.k8s.io"]
resources: ["mutatingwebhookconfigurations", "validatingwebhookconfigurations"]
verbs: ["get", "list", "watch", "create", "update"]
# Rules below is used generate admission service secret
- apiGroups: ["certificates.k8s.io"]
resources: ["certificatesigningrequests"]
verbs: ["get", "list", "create", "delete"]
- apiGroups: ["certificates.k8s.io"]
resources: ["certificatesigningrequests/approval"]
verbs: ["create", "update"]
- apiGroups: [""]
resources: ["secrets"]
verbs: ["create", "get", "patch"]
- apiGroups: ["scheduling.incubator.k8s.io", "scheduling.volcano.sh"]
resources: ["queues"]
verbs: ["get", "list"]
- apiGroups: [""]
resources: ["services"]
verbs: ["get"]
- apiGroups: ["scheduling.incubator.k8s.io", "scheduling.volcano.sh"]
resources: ["podgroups"]
verbs: ["get", "list", "watch"]
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: volcano-admission-role
subjects:
- kind: ServiceAccount
name: volcano-admission
namespace: volcano-system
roleRef:
kind: ClusterRole
name: volcano-admission
apiGroup: rbac.authorization.k8s.io
---
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
app: volcano-admission
name: volcano-admission
namespace: volcano-system
spec:
replicas: 1
selector:
matchLabels:
app: volcano-admission
template:
metadata:
labels:
app: volcano-admission
spec:
serviceAccount: volcano-admission
containers:
- args:
- --tls-cert-file=/admission.local.config/certificates/tls.crt
- --tls-private-key-file=/admission.local.config/certificates/tls.key
- --ca-cert-file=/admission.local.config/certificates/ca.crt
- --webhook-namespace=volcano-system
- --webhook-service-name=volcano-admission-service
- --logtostderr
- --port=8443
- -v=4
- 2>&1
image: hub.kce.ksyun.com/aivc/volcanosh/vc-webhook-manager:latest
imagePullPolicy: IfNotPresent
name: admission
volumeMounts:
- mountPath: /admission.local.config/certificates
name: admission-certs
readOnly: true
volumes:
- name: admission-certs
secret:
defaultMode: 420
secretName: volcano-admission-secret
---
apiVersion: v1
kind: Service
metadata:
labels:
app: volcano-admission
name: volcano-admission-service
namespace: volcano-system
spec:
ports:
- port: 443
protocol: TCP
targetPort: 8443
selector:
app: volcano-admission
sessionAffinity: None
---
apiVersion: batch/v1
kind: Job
metadata:
name: volcano-admission-init
namespace: volcano-system
labels:
app: volcano-admission-init
spec:
backoffLimit: 3
template:
spec:
serviceAccountName: volcano-admission
restartPolicy: Never
containers:
- name: main
image: hub.kce.ksyun.com/aivc/volcanosh/vc-webhook-manager:latest
imagePullPolicy: IfNotPresent
command: ["./gen-admission-secret.sh", "--service", "volcano-admission-service", "--namespace",
"volcano-system", "--secret", "volcano-admission-secret"]
---
# Source: volcano/templates/controllers.yaml
apiVersion: v1
kind: ServiceAccount
metadata:
name: volcano-controllers
namespace: volcano-system
---
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: volcano-controllers
rules:
- apiGroups: ["apiextensions.k8s.io"]
resources: ["customresourcedefinitions"]
verbs: ["create", "get", "list", "watch", "delete"]
- apiGroups: ["batch.volcano.sh"]
resources: ["jobs"]
verbs: ["get", "list", "watch", "update", "delete"]
- apiGroups: ["batch.volcano.sh"]
resources: ["jobs/status", "jobs/finalizers"]
verbs: ["update", "patch"]
- apiGroups: ["bus.volcano.sh"]
resources: ["commands"]
verbs: ["get", "list", "watch", "delete"]
- apiGroups: [""]
resources: ["events"]
verbs: ["create", "list", "watch", "update", "patch"]
- apiGroups: [""]
resources: ["pods"]
verbs: ["create", "get", "list", "watch", "update", "bind", "delete"]
- apiGroups: [""]
resources: ["persistentvolumeclaims"]
verbs: ["get", "list", "watch", "create"]
- apiGroups: [""]
resources: ["services"]
verbs: ["get", "list", "watch", "create", "delete"]
- apiGroups: [""]
resources: ["configmaps"]
verbs: ["get", "list", "watch", "create", "delete", "update"]
- apiGroups: [""]
resources: ["secrets"]
verbs: ["get", "list", "watch", "create", "delete", "update"]
- apiGroups: ["scheduling.incubator.k8s.io", "scheduling.volcano.sh"]
resources: ["podgroups", "queues", "queues/status"]
verbs: ["get", "list", "watch", "create", "delete", "update"]
- apiGroups: ["scheduling.k8s.io"]
resources: ["priorityclasses"]
verbs: ["get", "list", "watch", "create", "delete"]
- apiGroups: ["networking.k8s.io"]
resources: ["networkpolicies"]
verbs: ["get", "create"]
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: volcano-controllers-role
subjects:
- kind: ServiceAccount
name: volcano-controllers
namespace: volcano-system
roleRef:
kind: ClusterRole
name: volcano-controllers
apiGroup: rbac.authorization.k8s.io
---
kind: Deployment
apiVersion: apps/v1
metadata:
name: volcano-controllers
namespace: volcano-system
labels:
app: volcano-controller
spec:
replicas: 1
selector:
matchLabels:
app: volcano-controller
template:
metadata:
labels:
app: volcano-controller
spec:
serviceAccount: volcano-controllers
containers:
- name: volcano-controllers
image: hub.kce.ksyun.com/aivc/volcanosh/vc-controller-manager:latest
args:
- --logtostderr
- -v=4
- 2>&1
imagePullPolicy: "IfNotPresent"
---
# Source: volcano/templates/batch_v1alpha1_job.yaml
apiVersion: apiextensions.k8s.io/v1beta1
kind: CustomResourceDefinition
metadata:
name: jobs.batch.volcano.sh
spec:
group: batch.volcano.sh
names:
kind: Job
plural: jobs
shortNames:
- vcjob
- vj
scope: Namespaced
validation:
openAPIV3Schema:
type: object
properties:
apiVersion:
description: 'APIVersion defines the versioned schema of this representation
of an object. Servers should convert recognized schemas to the latest
internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#resources'
type: string
kind:
description: 'Kind is a string value representing the REST resource this
object represents. Servers may infer this from the endpoint the client
submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds'
type: string
metadata:
type: object
spec:
description: Specification of the desired behavior of a cron job, including
the minAvailable
properties:
volumes:
description: The volumes for Job
items:
properties:
volumeClaim:
description: VolumeClaim defines the PVC used by the VolumeMount.
type: object
mountPath:
description: Path within the container at which the volume should be mounted.
Must not contain ':'.
type: string
volumeClaimName:
description: The name of the volume claim.
type: string
type: object
required:
- mountPath
type: array
minAvailable:
description: The minimal available pods to run for this Job
format: int32
type: integer
policies:
description: Specifies the default lifecycle of tasks
items:
properties:
action:
description: The action that will be taken to the PodGroup according
to Event. One of "Restart", "None". Default to None.
type: string
event:
description: The Event recorded by scheduler; the controller takes
actions according to this Event.
type: string
events:
description: The Events recorded by scheduler; the controller takes
actions according to this Events.
type: array
items:
type: string
timeout:
description: Timeout is the grace period for controller to take
actions. Default to nil (take action immediately).
type: object
type: object
type: array
schedulerName:
description: SchedulerName is the default value of `tasks.template.spec.schedulerName`.
type: string
plugins:
description: Enabled task plugins when creating job.
type: object
tasks:
description: Tasks specifies the task specification of Job
items:
properties:
name:
description: Name specifies the name of tasks
type: string
policies:
description: Specifies the lifecycle of task
items:
properties:
action:
description: The action that will be taken to the PodGroup
according to Event. One of "Restart", "None". Default
to None.
type: string
event:
description: The Event recorded by scheduler; the controller
takes actions according to this Event.
type: string
events:
description: The Events recorded by scheduler; the controller takes
actions according to this Events.
type: array
items:
type: string
timeout:
description: Timeout is the grace period for controller
to take actions. Default to nil (take action immediately).
type: object
type: object
type: array
replicas:
description: Replicas specifies the replicas of this TaskSpec
in Job
format: int32
type: integer
template:
description: Specifies the pod that will be created for this TaskSpec
when executing a Job
type: object
type: object
type: array
queue:
description: The name of the queue on which job should been created
type: string
maxRetry:
description: The limit for retrying submiting job, default is 3
format: int32
type: integer
type: object
status:
description: Current status of Job
properties:
succeeded:
description: The number of pods which reached phase Succeeded.
format: int32
type: integer
failed:
description: The number of pods which reached phase Failed.
format: int32
type: integer
minAvailable:
description: The minimal available pods to run for this Job
format: int32
type: integer
pending:
description: The number of pending pods.
format: int32
type: integer
running:
description: The number of running pods.
format: int32
type: integer
version:
description: Job's current version
format: int32
type: integer
retryCount:
description: The number that volcano retried to submit the job.
format: int32
type: integer
controlledResources:
description: All of the resources that are controlled by this job.
type: object
additionalProperties:
type: string
state:
description: Current state of Job.
properties:
message:
description: Human-readable message indicating details about last
transition.
type: string
phase:
description: The phase of Job
type: string
reason:
description: Unique, one-word, CamelCase reason for the condition's
last transition.
type: string
lastTransitionTime:
description: The time of last state transition.
format: date-time
type: string
type: object
type: object
version: v1alpha1
subresources:
status: {}
status:
acceptedNames:
kind: ""
plural: ""
conditions: []
storedVersions: []
---
# Source: volcano/templates/bus_v1alpha1_command.yaml
apiVersion: apiextensions.k8s.io/v1beta1
kind: CustomResourceDefinition
metadata:
name: commands.bus.volcano.sh
spec:
group: bus.volcano.sh
names:
kind: Command
plural: commands
scope: Namespaced
validation:
openAPIV3Schema:
type: object
properties:
action:
description: Action defines the action that will be took to the target object.
type: string
apiVersion:
description: 'APIVersion defines the versioned schema of this representation
of an object. Servers should convert recognized schemas to the latest
internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#resources'
type: string
kind:
description: 'Kind is a string value representing the REST resource this
object represents. Servers may infer this from the endpoint the client
submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds'
type: string
message:
description: Human-readable message indicating details of this command.
type: string
metadata:
type: object
reason:
description: Unique, one-word, CamelCase reason for this command.
type: string
target:
description: TargetObject defines the target object of this command.
type: object
version: v1alpha1
status:
acceptedNames:
kind: ""
plural: ""
conditions: []
storedVersions: []
---
# Source: volcano/templates/scheduling_v1beta1_podgroup.yaml
apiVersion: apiextensions.k8s.io/v1beta1
kind: CustomResourceDefinition
metadata:
name: podgroups.scheduling.volcano.sh
spec:
group: scheduling.volcano.sh
names:
kind: PodGroup
plural: podgroups
shortNames:
- pg
- podgroup-v1beta1
scope: Namespaced
validation:
openAPIV3Schema:
properties:
apiVersion:
type: string
kind:
type: string
metadata:
type: object
spec:
properties:
minMember:
format: int32
type: integer
queue:
type: string
priorityClassName:
type: string
type: object
status:
properties:
succeeded:
format: int32
type: integer
failed:
format: int32
type: integer
running:
format: int32
type: integer
type: object
type: object
version: v1beta1
---
# Source: volcano/templates/scheduling_v1beta1_queue.yaml
apiVersion: apiextensions.k8s.io/v1beta1
kind: CustomResourceDefinition
metadata:
name: queues.scheduling.volcano.sh
spec:
group: scheduling.volcano.sh
names:
kind: Queue
plural: queues
shortNames:
- q
- queue-v1beta1
scope: Cluster
validation:
openAPIV3Schema:
properties:
apiVersion:
type: string
kind:
type: string
metadata:
type: object
spec:
properties:
weight:
format: int32
type: integer
capability:
type: object
type: object
status:
properties:
state:
type: string
unknown:
format: int32
type: integer
pending:
format: int32
type: integer
running:
format: int32
type: integer
inqueue:
format: int32
type: integer
type: object
type: object
version: v1beta1
subresources:
status: {}
......@@ -31,4 +31,7 @@ data:
DCGM_FI_DEV_PCIE_MAX_LINK_GEN, gauge, ningfd pcie max link gen
DCGM_FI_DEV_PCIE_MAX_LINK_WIDTH, gauge, ningfd pcie max link width
DCGM_FI_PROF_PCIE_RX_BYTES, gauge, ningfd pcie read bytes
DCGM_FI_PROF_PCIE_TX_BYTES, gauge, ningfd pcie trans bytes
\ No newline at end of file
DCGM_FI_PROF_PCIE_TX_BYTES, gauge, ningfd pcie trans bytes
DCGM_FI_DEV_FB_FREE, gauge, ningfd memory in MB
DCGM_FI_DEV_FB_TOTAL, gauge, ningfd memory in MB
DCGM_FI_DEV_FB_USED, gauge, ningfd memory in MB
\ No newline at end of file
......@@ -10,12 +10,13 @@ spec:
metadata:
labels:
app: wd-dcgm
monitor-by-prometheus: "true"
spec:
imagePullSecrets:
- name: "harbor-secret"
serviceAccountName: autodl-serviceaccount
nodeSelector:
gpu-type-nvidia: "true"
gpu-device-type-nvidia: "true"
kpl: "true"
containers:
- name: wd-dcgm
......@@ -53,7 +54,7 @@ spec:
volumes:
- name: "pod-gpu-resources"
hostPath:
path: "/var/lib/kubelet/pod-resources"
path: "/data/kubelet/pod-resources"
- name: config
configMap:
name: wd-dcgm-config
......@@ -9,6 +9,7 @@ spec:
metadata:
labels:
app: wd-grafana
monitor-by-prometheus: "true"
spec:
imagePullSecrets:
- name: "harbor-secret"
......
......@@ -10,6 +10,7 @@ spec:
metadata:
labels:
app: wd-exporter
monitor-by-prometheus: "true"
spec:
imagePullSecrets:
- name: "harbor-secret"
......
......@@ -126,116 +126,6 @@ data:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
# Example scrape config for service endpoints.
#
# The relabeling allows the actual service scrape endpoint to be configured
# for all or only some endpoints.
- job_name: 'kubernetes-service-endpoints'
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
# Example relabel to scrape only endpoints that have
# "example.io/should_be_scraped = true" annotation.
# - source_labels: [__meta_kubernetes_service_annotation_example_io_should_be_scraped]
# action: keep
# regex: true
#
# Example relabel to customize metric path based on endpoints
# "example.io/metric_path = <metric path>" annotation.
# - source_labels: [__meta_kubernetes_service_annotation_example_io_metric_path]
# action: replace
# target_label: __metrics_path__
# regex: (.+)
#
# Example relabel to scrape only single, desired port for the service based
# on endpoints "example.io/scrape_port = <port>" annotation.
# - source_labels: [__address__, __meta_kubernetes_service_annotation_example_io_scrape_port]
# action: replace
# regex: ([^:]+)(?::\d+)?;(\d+)
# replacement: $1:$2
# target_label: __address__
#
# Example relabel to configure scrape scheme for all service scrape targets
# based on endpoints "example.io/scrape_scheme = <scheme>" annotation.
# - source_labels: [__meta_kubernetes_service_annotation_example_io_scrape_scheme]
# action: replace
# target_label: __scheme__
# regex: (https?)
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_service_name]
action: replace
target_label: kubernetes_name
# Example scrape config for probing services via the Blackbox Exporter.
#
# The relabeling allows the actual service scrape endpoint to be configured
# for all or only some services.
- job_name: 'kubernetes-services'
metrics_path: /probe
params:
module: [http_2xx]
kubernetes_sd_configs:
- role: service
relabel_configs:
# Example relabel to probe only some services that have "example.io/should_be_probed = true" annotation
# - source_labels: [__meta_kubernetes_service_annotation_example_io_should_be_probed]
# action: keep
# regex: true
- source_labels: [__address__]
target_label: __param_target
- target_label: __address__
replacement: blackbox-exporter.example.com:9115
- source_labels: [__param_target]
target_label: instance
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_service_name]
target_label: kubernetes_name
# Example scrape config for probing ingresses via the Blackbox Exporter.
#
# The relabeling allows the actual ingress scrape endpoint to be configured
# for all or only some services.
- job_name: 'kubernetes-ingresses'
metrics_path: /probe
params:
module: [http_2xx]
kubernetes_sd_configs:
- role: ingress
relabel_configs:
# Example relabel to probe only some ingresses that have "example.io/should_be_probed = true" annotation
# - source_labels: [__meta_kubernetes_ingress_annotation_example_io_should_be_probed]
# action: keep
# regex: true
- source_labels: [__meta_kubernetes_ingress_scheme,__address__,__meta_kubernetes_ingress_path]
regex: (.+);(.+);(.+)
replacement: ${1}://${2}${3}
target_label: __param_target
- target_label: __address__
replacement: blackbox-exporter.example.com:9115
- source_labels: [__param_target]
target_label: instance
- action: labelmap
regex: __meta_kubernetes_ingress_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_ingress_name]
target_label: kubernetes_name
# Example scrape config for pods
#
# The relabeling allows the actual pod scrape to be configured
......@@ -245,6 +135,9 @@ data:
kubernetes_sd_configs:
- role: pod
selectors:
- role: pod
label: "monitor-by-prometheus=true"
relabel_configs:
# Example relabel to scrape only pods that have
......@@ -267,6 +160,9 @@ data:
# regex: ([^:]+)(?::\d+)?;(\d+)
# replacement: $1:$2
# target_label: __address__
#- source_labels: [__meta_kubernetes_pod_label_app]
# action: keep
# regex: (^wd*).*
- action: replace
regex: (.*)
replacement: $1
......
......@@ -9,6 +9,7 @@ spec:
metadata:
labels:
app: wd-prometheus
monitor-by-prometheus: "true"
spec:
imagePullSecrets:
- name: "harbor-secret"
......
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!