cloudwatch-alarm-creator
27
总安装量
3
周安装量
#13743
全站排名
安装命令
npx skills add https://github.com/dengineproblem/agents-monorepo --skill cloudwatch-alarm-creator
Agent 安装分布
github-copilot
3
amp
2
claude-code
2
kimi-cli
2
gemini-cli
2
Skill 文档
CloudWatch Alarm Creator
ÐкÑпеÑÑ Ð¿Ð¾ мониÑоÑÐ¸Ð½Ð³Ñ AWS CloudWatch и наÑÑÑойке алаÑмов.
ÐÑновнÑе пÑинÑипÑ
- ÐÑÐ±Ð¾Ñ Ð¿Ð¾Ñогов: ÐÑновÑвайÑе на иÑÑоÑиÑеÑÐºÐ¸Ñ Ð´Ð°Ð½Ð½ÑÑ Ð¸ бизнеÑ-ÑÑебованиÑÑ
- СÑаÑиÑÑиÑеÑкие меÑодÑ: ÐÑбиÑайÑе Ð¿Ð¾Ð´Ñ Ð¾Ð´ÑÑÑÑ ÑÑаÑиÑÑÐ¸ÐºÑ (Average, Sum, Maximum) по Ñ Ð°ÑакÑеÑиÑÑикам меÑÑик
- ÐеÑÐ¸Ð¾Ð´Ñ Ð¾Ñенки: ÐÐ°Ð»Ð°Ð½Ñ Ð¼ÐµÐ¶Ð´Ñ Ð¾ÑзÑвÑивоÑÑÑÑ Ð¸ подавлением ÑÑма
- Actionable алеÑÑÑ: ÐаждÑй алаÑм должен имеÑÑ Ð¿Ð¾Ð½ÑÑнÑй пÑÑÑ ÑÑÑÑанениÑ
- ÐпÑимизаÑÐ¸Ñ ÑÑоимоÑÑи: ÐÑÑекÑивнÑе ÑÑÑаÑегии Ð´Ð»Ñ Ð¼Ð¸Ð½Ð¸Ð¼Ð¸Ð·Ð°Ñии ÑаÑÑ Ð¾Ð´Ð¾Ð²
EC2 Alarm
{
"AlarmName": "HighCPUUtilization",
"MetricName": "CPUUtilization",
"Namespace": "AWS/EC2",
"Statistic": "Average",
"Period": 300,
"EvaluationPeriods": 2,
"Threshold": 80,
"ComparisonOperator": "GreaterThanThreshold",
"Dimensions": [
{
"Name": "InstanceId",
"Value": "i-1234567890abcdef0"
}
],
"AlarmActions": ["arn:aws:sns:region:account:topic"],
"TreatMissingData": "notBreaching"
}
ALB Alarm
{
"AlarmName": "HighTargetResponseTime",
"MetricName": "TargetResponseTime",
"Namespace": "AWS/ApplicationELB",
"Statistic": "Average",
"Period": 60,
"EvaluationPeriods": 3,
"DatapointsToAlarm": 2,
"Threshold": 1.0,
"ComparisonOperator": "GreaterThanThreshold",
"Dimensions": [
{
"Name": "LoadBalancer",
"Value": "app/my-alb/1234567890"
}
],
"TreatMissingData": "ignore"
}
RDS Alarm
{
"AlarmName": "HighDatabaseConnections",
"MetricName": "DatabaseConnections",
"Namespace": "AWS/RDS",
"Statistic": "Average",
"Period": 300,
"EvaluationPeriods": 2,
"Threshold": 100,
"ComparisonOperator": "GreaterThanThreshold",
"Dimensions": [
{
"Name": "DBInstanceIdentifier",
"Value": "my-database"
}
]
}
Terraform Configuration
resource "aws_cloudwatch_metric_alarm" "ec2_cpu_high" {
alarm_name = "ec2-cpu-high-${var.instance_id}"
comparison_operator = "GreaterThanThreshold"
evaluation_periods = 2
metric_name = "CPUUtilization"
namespace = "AWS/EC2"
period = 300
statistic = "Average"
threshold = 80
alarm_description = "CPU utilization exceeds 80%"
dimensions = {
InstanceId = var.instance_id
}
alarm_actions = [aws_sns_topic.alerts.arn]
ok_actions = [aws_sns_topic.alerts.arn]
tags = {
Environment = var.environment
ManagedBy = "terraform"
}
}
resource "aws_cloudwatch_metric_alarm" "custom_metric" {
alarm_name = "custom-error-rate"
comparison_operator = "GreaterThanThreshold"
evaluation_periods = 3
threshold = 5
alarm_description = "Error rate exceeds 5%"
metric_query {
id = "error_rate"
expression = "errors/requests*100"
label = "Error Rate"
return_data = true
}
metric_query {
id = "errors"
metric {
metric_name = "Errors"
namespace = "MyApp"
period = 60
stat = "Sum"
}
}
metric_query {
id = "requests"
metric {
metric_name = "Requests"
namespace = "MyApp"
period = 60
stat = "Sum"
}
}
}
Composite Alarm
{
"AlarmName": "CompositeSystemHealth",
"AlarmRule": "ALARM(HighCPU) AND (ALARM(HighMemory) OR ALARM(HighDisk))",
"AlarmActions": ["arn:aws:sns:region:account:critical-alerts"],
"AlarmDescription": "System health degraded - multiple metrics breaching"
}
Anomaly Detection
{
"AlarmName": "AnomalyDetectionCPU",
"MetricName": "CPUUtilization",
"Namespace": "AWS/EC2",
"ThresholdMetricId": "ad1",
"ComparisonOperator": "GreaterThanUpperThreshold",
"EvaluationPeriods": 2,
"Metrics": [
{
"Id": "m1",
"MetricStat": {
"Metric": {
"Namespace": "AWS/EC2",
"MetricName": "CPUUtilization",
"Dimensions": [{"Name": "InstanceId", "Value": "i-123"}]
},
"Period": 300,
"Stat": "Average"
}
},
{
"Id": "ad1",
"Expression": "ANOMALY_DETECTION_BAND(m1, 2)"
}
]
}
SNS Integration
resource "aws_sns_topic" "alerts" {
name = "cloudwatch-alerts"
}
resource "aws_sns_topic_subscription" "email" {
topic_arn = aws_sns_topic.alerts.arn
protocol = "email"
endpoint = "ops-team@example.com"
}
resource "aws_sns_topic_subscription" "lambda" {
topic_arn = aws_sns_topic.alerts.arn
protocol = "lambda"
endpoint = aws_lambda_function.alert_handler.arn
}
TreatMissingData Options
| ÐнаÑение | ÐпиÑание | ÐÑполÑзование |
|---|---|---|
notBreaching |
Missing = OK | СÑандаÑÑнÑе меÑÑики |
breaching |
Missing = ALARM | Heartbeat мониÑоÑинг |
ignore |
Ð¡Ð¾Ñ ÑанÑÑÑ ÑекÑÑее | ALB меÑÑики |
missing |
Missing = INSUFFICIENT | Ðо ÑмолÑÐ°Ð½Ð¸Ñ |
РекомендаÑии по поÑогам
EC2:
CPUUtilization:
warning: 70%
critical: 85%
period: 300s
StatusCheckFailed:
threshold: 1
period: 60s
ALB:
TargetResponseTime:
p95_warning: 500ms
p99_critical: 1000ms
HTTPCode_ELB_5XX:
threshold: 10
period: 60s
RDS:
CPUUtilization:
warning: 70%
critical: 85%
FreeableMemory:
critical: 256MB
DiskQueueDepth:
warning: 5
critical: 10
СÑоимоÑÑÑ Ð¾Ð¿ÑимизаÑии
- ÐонÑолидиÑÑйÑе алаÑÐ¼Ñ ÑеÑез composite alarms
- ÐÑполÑзÑйÑе более длиннÑе пеÑÐ¸Ð¾Ð´Ñ Ð³Ð´Ðµ возможно
- УдалÑйÑе неиÑполÑзÑемÑе алаÑÐ¼Ñ ÑегÑлÑÑно
- ÐÑÑппиÑÑйÑе ÑеÑÑÑÑÑ ÑеÑез Ñеги
ТеÑÑиÑование алаÑмов
# ÐеÑеклÑÑиÑÑ ÑоÑÑоÑние Ð´Ð»Ñ ÑеÑÑиÑÐ¾Ð²Ð°Ð½Ð¸Ñ Ñведомлений
aws cloudwatch set-alarm-state \
--alarm-name "HighCPUUtilization" \
--state-value ALARM \
--state-reason "Testing notifications"
ÐÑÑÑие пÑакÑики
- 2 из 3 datapoints â ÑилÑÑÑаÑÐ¸Ñ Ð²ÑеменнÑÑ Ñпайков
- Percentile-based thresholds â Ð´Ð»Ñ latency меÑÑик (P95, P99)
- Multi-level alerts â Warning и Critical ÑÑовни
- ÐокÑменÑиÑÑйÑе runbooks â Ð´Ð»Ñ ÐºÐ°Ð¶Ð´Ð¾Ð³Ð¾ Ñипа алаÑма
- РегÑлÑÑнÑй аÑÐ´Ð¸Ñ â пеÑеÑмаÑÑивайÑе ÑÑÑекÑивноÑÑÑ Ð¿Ð¾Ñогов