| Rule |
State |
Error |
Last Evaluation |
Evaluation Time |
| record: instance:unicorn_utilization:ratio
expr: sum
by(instance) (unicorn_active_connections) / count by(instance) (ruby_memory_bytes)
|
ok
|
|
44.531s ago
|
326.8us |
| record: job_grpc:grpc_server_handled_total:rate5m
expr: sum
by(job, grpc_code, grpc_method, grpc_service, grpc_type) (rate(grpc_server_handled_total[5m]))
|
ok
|
|
44.53s ago
|
40.13ms |
| record: job_route_method_code:gitlab_workhorse_http_request_duration_seconds_count:rate5m
expr: sum
by(job, route, method, code) (rate(gitlab_workhorse_http_request_duration_seconds_count[5m]))
|
ok
|
|
44.49s ago
|
538.9us |
| alert: ServiceDown
expr: avg_over_time(up[5m])
* 100 < 50
annotations:
description: The service {{ $labels.job }} instance {{ $labels.instance }} is not
responding for more than 50% of the time for 5 minutes.
summary: The service {{ $labels.job }} is not responding
|
ok
|
|
44.49s ago
|
222.3us |
| alert: RedisDown
expr: avg_over_time(redis_up[5m])
* 100 < 50
annotations:
description: The Redis service {{ $labels.job }} instance {{ $labels.instance }}
is not responding for more than 50% of the time for 5 minutes.
summary: The Redis service {{ $labels.job }} is not responding
|
ok
|
|
44.49s ago
|
81.51us |
| alert: PostgresDown
expr: avg_over_time(pg_up[5m])
* 100 < 50
annotations:
description: The Postgres service {{ $labels.job }} instance {{ $labels.instance
}} is not responding for more than 50% of the time for 5 minutes.
summary: The Postgres service {{ $labels.job }} is not responding
|
ok
|
|
44.49s ago
|
63.8us |
| alert: UnicornQueueing
expr: avg_over_time(unicorn_queued_connections[30m])
> 1
annotations:
description: Unicorn instance {{ $labels.instance }} is queueing requests with an
average of {{ $value | printf "%.1f" }} over the last 30 minutes.
summary: Unicorn is queueing requests
|
ok
|
|
44.49s ago
|
161.7us |
| alert: HighUnicornUtilization
expr: instance:unicorn_utilization:ratio
* 100 > 90
for: 1h
annotations:
description: Unicorn instance {{ $labels.instance }} has more than 90% worker utilization
({{ $value | printf "%.1f" }}%) over the last 60 minutes.
summary: Unicorn is has high utilization
|
ok
|
|
44.49s ago
|
67.05us |
| alert: SidekiqJobsQueuing
expr: sum
by(name) (sidekiq_queue_size) > 0
for: 1h
annotations:
description: Sidekiq queue {{ $labels.name }} has {{ $value }} jobs queued for 60
minutes.
summary: Sidekiq has jobs queued
|
ok
|
|
44.49s ago
|
1.317ms |
| alert: HighgRPCResourceExhaustedRate
expr: sum
without(grpc_code) (job_grpc:grpc_server_handled_total:rate5m{grpc_code="ResourceExhausted"})
/ sum without(grpc_code) (job_grpc:grpc_server_handled_total:rate5m) * 100 >
1
for: 1h
annotations:
description: gRPC is returning more than 1% ({{ $value | printf "%.1f" }}%)
ResourceExhausted errors over the last 60 minutes.
summary: High gRPC ResourceExhausted error rate
|
ok
|
|
44.489s ago
|
14.73ms |
| alert: PostgresDatabaseDeadlocks
expr: increase(pg_stat_database_deadlocks[5m])
> 0
annotations:
description: Postgres database {{ $labels.instance }} had {{ $value | printf "%d"
}} deadlocks in the last 5 minutes.
summary: Postgres database has deadlocks
|
ok
|
|
44.474s ago
|
147.9us |
| alert: PostgresDatabaseDeadlockCancels
expr: increase(pg_stat_database_deadlocks[5m])
> 0
annotations:
description: Postgres database {{ $labels.instance }} had {{ $value | printf "%d"
}} queries canceled due to deadlocks in the last 5 minutes.
summary: Postgres database has queries canceled due to deadlocks
|
ok
|
|
44.474s ago
|
98.1us |
| alert: WorkhorseHighErrorRate
expr: (sum
without(job, code) (job_route_method_code:gitlab_workhorse_http_request_duration_seconds_count:rate5m{code=~"5.."})
/ sum without(job, code) (job_route_method_code:gitlab_workhorse_http_request_duration_seconds_count:rate5m)
< 10) * 100 > 50
annotations:
description: Workhorse route {{ $labels.route }} method {{ $labels.method }} has
more than 50% errors ({{ $value | printf "%.1f" }}%) for the last 60 minutes.
summary: Workhorse has high error rates
|
ok
|
|
44.474s ago
|
286.4us |
| alert: WorkhorseHighErrorRate
expr: (sum
without(job, code) (job_route_method_code:gitlab_workhorse_http_request_duration_seconds_count:rate5m{code=~"5.."})
/ sum without(job, code) (job_route_method_code:gitlab_workhorse_http_request_duration_seconds_count:rate5m)
> 10) * 100 > 10
annotations:
description: Workhorse route {{ $labels.route }} method {{ $labels.method }} has
more than 10% errors ({{ $value | printf "%.1f" }}%) for the last 60 minutes.
summary: Workhorse has high error rates
|
ok
|
|
44.474s ago
|
257.5us |
|
36.049s ago |
4.372ms |