Skip to content

Commit

Permalink
Merge pull request #1441 from DataDog/leo/php_check
Browse files Browse the repository at this point in the history
[php-fpm] new check, get perf insight of your php-fpm install
  • Loading branch information
degemer committed Mar 18, 2015
2 parents 8fb5a06 + a23c7fb commit 03d7716
Show file tree
Hide file tree
Showing 9 changed files with 367 additions and 8 deletions.
1 change: 1 addition & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ env:
- TRAVIS_FLAVOR=etcd
- TRAVIS_FLAVOR=pgbouncer
- TRAVIS_FLAVOR=supervisord
- TRAVIS_FLAVOR=phpfpm

# Override travis defaults with empty jobs
before_install: echo "OVERRIDING TRAVIS STEPS"
Expand Down
1 change: 1 addition & 0 deletions Rakefile
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ require './ci/mongo'
require './ci/mysql'
require './ci/nginx'
require './ci/pgbouncer'
require './ci/phpfpm'
require './ci/postgres'
require './ci/rabbitmq'
require './ci/redis'
Expand Down
125 changes: 125 additions & 0 deletions checks.d/php_fpm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
# 3p
import requests

# project
from checks import AgentCheck
from util import headers


class PHPFPMCheck(AgentCheck):
"""
Tracks basic php-fpm metrics via the status module
Requires php-fpm pools to have the status option.
See http://www.php.net/manual/de/install.fpm.configuration.php#pm.status-path for more details
"""

SERVICE_CHECK_NAME = 'php_fpm.can_ping'

GAUGES = {
'listen queue': 'php_fpm.listen_queue.size',
'idle processes': 'php_fpm.processes.idle',
'active processes': 'php_fpm.processes.active',
'total processes': 'php_fpm.processes.total',
}

RATES = {
'max children reached': 'php_fpm.processes.max_reached'
}

COUNTERS = {
'accepted conn': 'php_fpm.requests.accepted',
'slow requests': 'php_fpm.requests.slow'
}

def check(self, instance):
status_url = instance.get('status_url')
ping_url = instance.get('ping_url')

auth = None
user = instance.get('user')
password = instance.get('password')

tags = instance.get('tags', [])

if user and password:
auth = (user, password)

if status_url is None and ping_url is None:
raise Exception("No status_url or ping_url specified for this instance")

pool = None
status_exception = None
if status_url is not None:
try:
pool = self._process_status(status_url, auth, tags)
except Exception as e:
status_exception = e
pass

if ping_url is not None:
self._process_ping(ping_url, auth, tags, pool)

if status_exception is not None:
raise status_exception

def _process_status(self, status_url, auth, tags):
data = {}
try:
# TODO: adding the 'full' parameter gets you per-process detailed
# informations, which could be nice to parse and output as metrics
resp = requests.get(status_url, auth=auth,
headers=headers(self.agentConfig),
params={'json': True})
resp.raise_for_status()

data = resp.json()
except Exception as e:
self.log.error("Failed to get metrics from {0}.\nError {1}".format(status_url, e))
raise

pool_name = data.get('pool', 'default')
metric_tags = tags + ["pool:{0}".format(pool_name)]

for key, mname in self.GAUGES.iteritems():
if key not in data:
self.log.warn("Gauge metric {0} is missing from FPM status".format(key))
continue
self.gauge(mname, int(data[key]), tags=metric_tags)

for key, mname in self.RATES.iteritems():
if key not in data:
self.log.warn("Rate metric {0} is missing from FPM status".format(key))
continue
self.rate(mname, int(data[key]), tags=metric_tags)

for key, mname in self.COUNTERS.iteritems():
if key not in data:
self.log.warn("Counter metric {0} is missing from FPM status".format(key))
continue
self.increment(mname, int(data[key]), tags=metric_tags)

# return pool, to tag the service check with it if we have one
return pool_name

def _process_ping(self, ping_url, auth, tags, pool_name):
sc_tags = tags[:]
if pool_name is not None:
sc_tags.append("pool:{0}".format(pool_name))

try:
# TODO: adding the 'full' parameter gets you per-process detailed
# informations, which could be nice to parse and output as metrics
resp = requests.get(ping_url, auth=auth,
headers=headers(self.agentConfig))
resp.raise_for_status()

if 'pong' not in resp.text:
raise Exception("Received unexpected reply to ping {0}".format(resp.text))

except Exception as e:
self.log.error("Failed to ping FPM pool {0} on URL {1}."
"\nError {2}".format(pool_name, ping_url, e))
self.service_check(self.SERVICE_CHECK_NAME,
AgentCheck.CRITICAL, tags=sc_tags, message=str(e))
else:
self.service_check(self.SERVICE_CHECK_NAME, AgentCheck.OK, tags=sc_tags)
85 changes: 85 additions & 0 deletions ci/phpfpm.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
require './ci/common'

def nginx_version
ENV['NGINX_VERSION'] || '1.7.9'
end

def php_version
ENV['PHP_VERSION'] || '5.6.6'
end

def phpfpm_rootdir
"#{ENV['INTEGRATIONS_DIR']}/phpfpm_#{php_version}"
end

namespace :ci do
namespace :phpfpm do |flavor|
task :before_install => ['ci:common:before_install']

task :install => ['ci:common:install'] do
unless Dir.exist? File.expand_path(phpfpm_rootdir)
sh %(curl -s -L\
-o $VOLATILE_DIR/nginx-#{nginx_version}.tar.gz\
http://nginx.org/download/nginx-#{nginx_version}.tar.gz)
sh %(mkdir -p #{phpfpm_rootdir})
sh %(mkdir -p $VOLATILE_DIR/nginx)
sh %(tar zxf $VOLATILE_DIR/nginx-#{nginx_version}.tar.gz\
-C $VOLATILE_DIR/nginx --strip-components=1)
sh %(cd $VOLATILE_DIR/nginx\
&& ./configure --prefix=#{phpfpm_rootdir} --with-http_stub_status_module\
&& make -j $CONCURRENCY\
&& make install)
sh %(curl -s -L\
-o $VOLATILE_DIR/php-#{php_version}.tar.bz2\
http://us1.php.net/get/php-#{php_version}.tar.bz2/from/this/mirror)
sh %(mkdir -p $VOLATILE_DIR/php)
sh %(tar jxf $VOLATILE_DIR/php-#{php_version}.tar.bz2\
-C $VOLATILE_DIR/php --strip-components=1)
sh %(cd $VOLATILE_DIR/php\
&& ./configure --prefix=#{phpfpm_rootdir} --enable-fpm\
&& make -j $CONCURRENCY\
&& make install)
end
end

task :before_script => ['ci:common:before_script'] do
sh %(cp $TRAVIS_BUILD_DIR/ci/resources/phpfpm/nginx.conf\
#{phpfpm_rootdir}/conf/nginx.conf)
sh %(cp $TRAVIS_BUILD_DIR/ci/resources/phpfpm/php-fpm.conf\
#{phpfpm_rootdir}/etc/php-fpm.conf)
sh %(#{phpfpm_rootdir}/sbin/nginx -g "pid #{ENV['VOLATILE_DIR']}/nginx.pid;")
sh %(#{phpfpm_rootdir}/sbin/php-fpm -g #{ENV['VOLATILE_DIR']}/php-fpm.pid)
end

task :script => ['ci:common:script'] do
this_provides = [
'phpfpm'
]
Rake::Task['ci:common:run_tests'].invoke(this_provides)
end

task :cleanup => ['ci:common:cleanup'] do
sh %(kill `cat $VOLATILE_DIR/nginx.pid`)
sh %(kill `cat $VOLATILE_DIR/php-fpm.pid`)
end

task :execute do
exception = nil
begin
%w(before_install install before_script script).each do |t|
Rake::Task["#{flavor.scope.path}:#{t}"].invoke
end
rescue => e
exception = e
puts "Failed task: #{e.class} #{e.message}".red
end
if ENV['SKIP_CLEANUP']
puts 'Skipping cleanup, disposable environments are great'.yellow
else
puts 'Cleaning up'
Rake::Task["#{flavor.scope.path}:cleanup"].invoke
end
fail exception if exception
end
end
end
21 changes: 21 additions & 0 deletions ci/resources/phpfpm/nginx.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
worker_processes 1;
events {
worker_connections 1024;
}
http {
include mime.types;
default_type application/octet-stream;
sendfile on;
keepalive_timeout 65;
server {
listen 42424;
server_name localhost;
location ~ /(status|ping|\.*\.php)$ {
root html;
fastcgi_pass 127.0.0.1:9000;
fastcgi_index index.php;
fastcgi_param SCRIPT_FILENAME /scripts$fastcgi_script_name;
include fastcgi_params;
}
}
}
11 changes: 11 additions & 0 deletions ci/resources/phpfpm/php-fpm.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
[www]
user = nobody
group = nobody
listen = 127.0.0.1:9000
pm = dynamic
pm.max_children = 5
pm.start_servers = 2
pm.min_spare_servers = 1
pm.max_spare_servers = 3
pm.status_path = /status
ping.path = /ping
24 changes: 24 additions & 0 deletions conf.d/php_fpm.yaml.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
init_config:

instances:
- # Get metrics from your FPM pool with this URL
status_url: http://localhost/status
# Get a reliable service check of you FPM pool with that one
ping_url: http://localhost/ping
# These 2 URLs should follow the options from your FPM pool
# See http://php.net/manual/en/install.fpm.configuration.php
# * pm.status_path
# * ping.path
# You should configure your fastcgi passthru (nginx/apache) to
# catch these URLs and redirect them through the FPM pool target
# you want to monitor (FPM `listen` directive in the config, usually
# a UNIX socket or TCP socket.
#
# Use this if you have basic authentication on these pages
# user: bits
# password: D4T4D0G
#
# Array of custom tags
# By default metrics and service check will be tagged by pool and host
# tags:
# - instance:foo
17 changes: 9 additions & 8 deletions tests/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,10 @@ def coverage_report(self):
tested_metrics += 1
else:
untested_metrics.append(m)
coverage_metrics=100.0 * tested_metrics / total_metrics
if total_metrics == 0:
coverage_metrics = 100.0
else:
coverage_metrics = 100.0 * tested_metrics / total_metrics

total_sc = len(self.service_checks)
tested_sc = 0
Expand All @@ -190,7 +193,11 @@ def coverage_report(self):
tested_sc += 1
else:
untested_sc.append(sc)
coverage_sc=100.0 * tested_sc / total_sc

if total_sc == 0:
coverage_sc = 100.0
else:
coverage_sc = 100.0 * tested_sc / total_sc

coverage = """Coverage
========================================
Expand Down Expand Up @@ -240,8 +247,6 @@ def assertMetric(self, metric_name, value=None, tags=None, count=None, at_least=
if count is not None:
log.debug(" * should have exactly {0} data points".format(count))
if at_least is not None:
if count is not None:
log.warn("Tolerance param will be ignored b/c count is present")
log.debug(" * should have at least {0} data points".format(at_least))

candidates = []
Expand All @@ -266,8 +271,6 @@ def assertMetricTagPrefix(self, metric_name, tag_prefix, count=None, at_least=1)
if count is not None:
log.debug(" * should have exactly {0} data points".format(count))
if at_least is not None:
if count is not None:
log.warn("Tolerance param will be ignored b/c count is present")
log.debug(" * should have at least {0} data points".format(at_least))

candidates = []
Expand All @@ -290,8 +293,6 @@ def assertMetricTag(self, metric_name, tag, count=None, at_least=1):
if count is not None:
log.debug(" * should have exactly {0} data points".format(count))
if at_least is not None:
if count is not None:
log.warn("Tolerance param will be ignored b/c count is present")
log.debug(" * should have at least {0} data points".format(at_least))

candidates = []
Expand Down
Loading

0 comments on commit 03d7716

Please sign in to comment.