Add log JSON formatter

This commit is contained in:
Antoine Legrand 2017-02-02 00:17:25 +01:00 committed by Antoine Legrand
parent 4750d1c5ef
commit 3c99928a27
27 changed files with 402 additions and 41 deletions

65
app.py
View File

@ -1,3 +1,4 @@
import hashlib
import json
import logging
import os
@ -13,7 +14,7 @@ from jwkest.jwk import RSAKey
from werkzeug.routing import BaseConverter
import features
from auth.auth_context import get_authenticated_user
from avatars.avatars import Avatar
from buildman.manager.buildcanceller import BuildCanceller
from data import database
@ -31,6 +32,7 @@ from oauth.services.github import GithubOAuthService
from oauth.services.gitlab import GitLabOAuthService
from oauth.loginmanager import OAuthLoginManager
from storage import Storage
from util.log import filter_logs
from util import get_app_url
from util.saas.analytics import Analytics
from util.saas.useranalytics import UserAnalytics
@ -49,6 +51,7 @@ from util.tufmetadata.api import TUFMetadataAPI
from util.security.instancekeys import InstanceKeys
from util.security.signing import Signer
OVERRIDE_CONFIG_DIRECTORY = 'conf/stack/'
OVERRIDE_CONFIG_YAML_FILENAME = 'conf/stack/config.yaml'
OVERRIDE_CONFIG_PY_FILENAME = 'conf/stack/config.py'
@ -102,6 +105,10 @@ if (app.config['PREFERRED_URL_SCHEME'] == 'https' and
# Load features from config.
features.import_features(app.config)
CONFIG_DIGEST = hashlib.sha256(json.dumps(app.config, default=str)).hexdigest()[0:8]
logger.debug("Loaded config", extra={"config": app.config})
class RequestWithId(Request):
request_gen = staticmethod(urn_generator(['request']))
@ -114,26 +121,60 @@ class RequestWithId(Request):
@app.before_request
def _request_start():
logger.debug('Starting request: %s', request.path)
logger.info("request-start", extra={"request_id": request.request_id})
DEFAULT_FILTER = lambda x: '[FILTERED]'
FILTERED_VALUES = [
{'key': ['password'], 'fn': DEFAULT_FILTER},
{'key': ['user', 'password'], 'fn': DEFAULT_FILTER},
{'key': ['blob'], 'fn': lambda x: x[0:8]}
]
@app.after_request
def _request_end(r):
def _request_end(resp):
jsonbody = request.get_json(force=True, silent=True)
values = request.values.to_dict()
if jsonbody and not isinstance(jsonbody, dict):
jsonbody = {'_parsererror': jsonbody}
if isinstance(values, dict):
filter_logs(values, FILTERED_VALUES)
extra = {
"endpoint": request.endpoint,
"request_id" : request.request_id,
"remote_addr": request.remote_addr,
"http_method": request.method,
"original_url": request.url,
"path": request.path,
"parameters": values,
"json_body": jsonbody,
"confsha": CONFIG_DIGEST,
}
if request.user_agent is not None:
extra["user-agent"] = request.user_agent.string
user = get_authenticated_user()
if user:
extra['user'] = {'email': user.email,
'uuid': user.uuid,
'org': user.organization,
'robot': user.robot}
logger.info("request-end", extra=extra)
logger.debug('Ending request: %s', request.path)
return r
return resp
class InjectingFilter(logging.Filter):
def filter(self, record):
if _request_ctx_stack.top is not None:
record.msg = '[%s] %s' % (request.request_id, record.msg)
return True
root_logger = logging.getLogger()
# Add the request id filter to all handlers of the root logger
for handler in root_logger.handlers:
handler.addFilter(InjectingFilter())
app.request_class = RequestWithId
# Register custom converters.

View File

@ -1,6 +1,8 @@
import os
import logging
import logging.config
from util.log import logfile_path
from app import app as application
@ -12,5 +14,5 @@ import secscan
if __name__ == '__main__':
logging.config.fileConfig('conf/logging_debug.conf', disable_existing_loggers=False)
logging.config.fileConfig(logfile_path(debug=True), disable_existing_loggers=False)
application.run(port=5000, debug=True, threaded=True, host='0.0.0.0')

View File

@ -1,10 +1,12 @@
from Crypto import Random
from util.log import logfile_path
logconfig = logfile_path(debug=True)
bind = '0.0.0.0:5000'
workers = 2
worker_class = 'gevent'
daemon = False
logconfig = 'conf/logging_debug.conf'
pythonpath = '.'
preload_app = True

View File

@ -1,9 +1,11 @@
from Crypto import Random
from util.log import logfile_path
logconfig = logfile_path(debug=False)
bind = 'unix:/tmp/gunicorn_registry.sock'
workers = 8
worker_class = 'gevent'
logconfig = 'conf/logging.conf'
pythonpath = '.'
preload_app = True

View File

@ -1,9 +1,11 @@
from Crypto import Random
from util.log import logfile_path
logconfig = logfile_path(debug=False)
bind = 'unix:/tmp/gunicorn_secscan.sock'
workers = 2
worker_class = 'gevent'
logconfig = 'conf/logging.conf'
pythonpath = '.'
preload_app = True

View File

@ -1,8 +1,11 @@
from Crypto import Random
from util.log import logfile_path
logconfig = logfile_path(debug=False)
bind = 'unix:/tmp/gunicorn_verbs.sock'
workers = 4
logconfig = 'conf/logging.conf'
pythonpath = '.'
preload_app = True
timeout = 2000 # Because sync workers

View File

@ -1,9 +1,12 @@
from Crypto import Random
from util.log import logfile_path
logconfig = logfile_path(debug=False)
bind = 'unix:/tmp/gunicorn_web.sock'
workers = 2
worker_class = 'gevent'
logconfig = 'conf/logging.conf'
pythonpath = '.'
preload_app = True

View File

@ -1,11 +1,11 @@
[loggers]
keys=root
keys=root,gunicorn.error,gunicorn.access
[handlers]
keys=console
[formatters]
keys=generic
keys=generic,json
[logger_root]
level=INFO
@ -19,3 +19,18 @@ args=(sys.stdout, )
[formatter_generic]
format=%(asctime)s [%(process)d] [%(levelname)s] [%(name)s] %(message)s
class=logging.Formatter
[formatter_json]
class=loghandler.JsonFormatter
[logger_gunicorn.error]
level=ERROR
handlers=console
propagate=0
qualname=gunicorn.error
[logger_gunicorn.access]
handlers=console
propagate=0
qualname=gunicorn.access
level=DEBUG

View File

@ -1,11 +1,11 @@
[loggers]
keys=root,boto
keys=root,boto,gunicorn.error,gunicorn.access
[handlers]
keys=console
[formatters]
keys=generic
keys=generic,json
[logger_root]
level=DEBUG
@ -16,11 +16,26 @@ level=INFO
handlers=console
qualname=boto
[logger_gunicorn.access]
handlers=console
propagate=0
qualname=gunicorn.access
level=DEBUG
[handler_console]
class=StreamHandler
formatter=generic
args=(sys.stdout, )
[logger_gunicorn.error]
level=ERROR
handlers=console
propagate=0
qualname=gunicorn.error
[formatter_generic]
format=%(asctime)s [%(process)d] [%(levelname)s] [%(name)s] %(message)s
class=logging.Formatter
[formatter_json]
class=loghandler.JsonFormatter

View File

@ -0,0 +1,41 @@
[loggers]
keys=root,boto,gunicorn.error,gunicorn.access
[handlers]
keys=console
[formatters]
keys=generic,json
[logger_root]
level=DEBUG
handlers=console
[logger_boto]
level=INFO
handlers=console
qualname=boto
[logger_gunicorn.access]
handlers=console
propagate=0
qualname=gunicorn.access
level=DEBUG
[handler_console]
class=StreamHandler
formatter=json
args=(sys.stdout, )
[logger_gunicorn.error]
level=ERROR
handlers=console
propagate=0
qualname=gunicorn.error
[formatter_generic]
format=%(asctime)s [%(process)d] [%(levelname)s] [%(name)s] %(message)s
class=logging.Formatter
[formatter_json]
class=loghandler.JsonFormatter

36
conf/logging_json.conf Normal file
View File

@ -0,0 +1,36 @@
[loggers]
keys=root,gunicorn.error,gunicorn.access
[handlers]
keys=console
[formatters]
keys=json,generic
[logger_root]
level=INFO
handlers=console
[handler_console]
class=StreamHandler
formatter=json
args=(sys.stdout, )
[formatter_generic]
format=%(asctime)s [%(process)d] [%(levelname)s] [%(name)s] %(message)s
class=logging.Formatter
[formatter_json]
class=loghandler.JsonFormatter
[logger_gunicorn.error]
level=ERROR
handlers=console
propagate=0
qualname=gunicorn.error
[logger_gunicorn.access]
handlers=console
propagate=0
qualname=gunicorn.access
level=DEBUG

View File

@ -429,4 +429,3 @@ import endpoints.api.trigger
import endpoints.api.user
import endpoints.api.secscan
import endpoints.api.signing

114
loghandler.py Executable file
View File

@ -0,0 +1,114 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import absolute_import
import datetime
import json
import logging
import re
import traceback
LOG_FORMAT_REGEXP = re.compile(r'\((.+?)\)', re.IGNORECASE)
def _json_default(obj):
"""
Coerce everything to strings.
All objects representing time get output as ISO8601.
"""
if isinstance(obj, (datetime.date, datetime.time, datetime.datetime)):
return obj.isoformat()
elif isinstance(obj, Exception):
return "Exception: %s" % str(obj)
return str(obj)
# skip natural LogRecord attributes
# http://docs.python.org/library/logging.html#logrecord-attributes
RESERVED_ATTRS = set([
'args', 'asctime', 'created', 'exc_info', 'exc_text', 'filename', 'funcName', 'levelname',
'levelno', 'lineno', 'module', 'msecs', 'message', 'msg', 'name', 'pathname', 'process',
'processName', 'relativeCreated', 'stack_info', 'thread', 'threadName'
])
class JsonFormatter(logging.Formatter):
"""
A custom formatter to format logging records as json strings.
extra values will be formatted as str() if nor supported by
json default encoder
"""
def __init__(self, *args, **kwargs):
"""
:param json_default: a function for encoding non-standard objects
as outlined in http://docs.python.org/2/library/json.html
:param json_encoder: optional custom encoder
:param json_serializer: a :meth:`json.dumps`-compatible callable
that will be used to serialize the log record.
:param prefix: an optional key prefix to nest logs
"""
self.json_default = kwargs.pop("json_default", _json_default)
self.json_encoder = kwargs.pop("json_encoder", None)
self.json_serializer = kwargs.pop("json_serializer", json.dumps)
self.default_values = kwargs.pop("default_extra", {})
self.prefix_key = kwargs.pop("prefix_key", "data")
logging.Formatter.__init__(self, *args, **kwargs)
self._fmt_parameters = self._parse_format_string()
self._skip_fields = set(self._fmt_parameters)
self._skip_fields.update(RESERVED_ATTRS)
def _parse_format_string(self):
"""Parses format string looking for substitutions"""
standard_formatters = LOG_FORMAT_REGEXP
return standard_formatters.findall(self._fmt)
def add_fields(self, log_record, record, message_dict):
"""
Override this method to implement custom logic for adding fields.
"""
target = log_record
if self.prefix_key:
log_record[self.prefix_key] = {}
target = log_record[self.prefix_key]
for field, value in record.__dict__.iteritems():
if field in self._fmt_parameters and field in RESERVED_ATTRS:
log_record[field] = value
elif field not in RESERVED_ATTRS:
target[field] = value
target.update(message_dict)
target.update(self.default_values)
def format(self, record):
"""Formats a log record and serializes to json"""
message_dict = {}
if isinstance(record.msg, dict):
message_dict = record.msg
record.message = None
if "message" in message_dict:
record.message = message_dict.pop("message", "")
else:
record.message = record.getMessage()
# only format time if needed
if "asctime" in self._fmt_parameters:
record.asctime = self.formatTime(record, self.datefmt)
# Display formatted exception, but allow overriding it in the
# user-supplied dict.
if record.exc_info and not message_dict.get('exc_info'):
message_dict['exc_info'] = traceback.format_list(traceback.extract_tb(record.exc_info[2]))
log_record = {}
self.add_fields(log_record, record, message_dict)
return self.json_serializer(log_record, default=self.json_default, cls=self.json_encoder)

View File

@ -11,8 +11,6 @@ from endpoints.appr import appr_bp, registry # registry needed to ensure routes
from endpoints.v1 import v1_bp
from endpoints.v2 import v2_bp
if os.environ.get('DEBUGLOG') == 'true':
logging.config.fileConfig('conf/logging_debug.conf', disable_existing_loggers=False)
application.register_blueprint(v1_bp, url_prefix='/v1')
application.register_blueprint(v2_bp, url_prefix='/v2')

View File

@ -5,7 +5,5 @@ from app import app as application
from endpoints.secscan import secscan
if os.environ.get('DEBUGLOG') == 'true':
logging.config.fileConfig('conf/logging_debug.conf', disable_existing_loggers=False)
application.register_blueprint(secscan, url_prefix='/secscan')

46
util/log.py Normal file
View File

@ -0,0 +1,46 @@
import os
def logfile_path(jsonfmt=False, debug=False):
"""
Returns the a logfileconf path following this rules:
- conf/logging_debug_json.conf # jsonfmt=true, debug=true
- conf/logging_json.conf # jsonfmt=true, debug=false
- conf/logging_debug.conf # jsonfmt=false, debug=true
- conf/logging.conf # jsonfmt=false, debug=false
Can be parametrized via envvars: JSONLOG=true, DEBUGLOG=true
"""
_json = ""
_debug = ""
if jsonfmt or os.getenv('JSONLOG', 'false').lower() == 'true':
_json = "_json"
if debug or os.getenv('DEBUGLOG', 'false').lower() == 'true':
_debug = "_debug"
return 'conf/logging%s%s.conf' % (_debug, _json)
def filter_logs(values, filtered_fields):
"""
Takes a dict and a list of keys to filter.
eg:
with filtered_fields:
[{'key': ['k1', k2'], 'fn': lambda x: 'filtered'}]
and values:
{'k1': {'k2': 'some-secret'}, 'k3': 'some-value'}
the returned dict is:
{'k1': {k2: 'filtered'}, 'k3': 'some-value'}
"""
for field in filtered_fields:
cdict = values
for key in field['key'][:-1]:
if key in cdict:
cdict = cdict[key]
last_key = field['key'][-1]
if last_key in cdict and cdict[last_key]:
cdict[last_key] = field['fn'](cdict[last_key])

View File

@ -0,0 +1,41 @@
import pytest
from util.log import logfile_path, filter_logs
from app import FILTERED_VALUES
def test_filter_logs():
values = {
'user': {
'password': "toto"
},
'blob': '1234567890asdfewkqresfdsfewfdsfd',
'unfiltered': 'foo'
}
filter_logs(values, FILTERED_VALUES)
assert values == {'user': {'password': '[FILTERED]'}, 'blob': '12345678', 'unfiltered': "foo"}
@pytest.mark.parametrize('debug,jsonfmt,expected', [
(False, False, "conf/logging.conf"),
(False, True, "conf/logging_json.conf"),
(True, False, "conf/logging_debug.conf"),
(True, True, "conf/logging_debug_json.conf"),
])
def test_logfile_path(debug, jsonfmt, expected, monkeypatch):
assert logfile_path(jsonfmt=jsonfmt, debug=debug) == expected
@pytest.mark.parametrize('debug,jsonfmt,expected', [
("false", "false", "conf/logging.conf"),
("false", "true", "conf/logging_json.conf"),
("true", "false", "conf/logging_debug.conf"),
("true", "true", "conf/logging_debug_json.conf"),
])
def test_logfile_path_env(debug, jsonfmt, expected, monkeypatch):
monkeypatch.setenv("DEBUGLOG", debug)
monkeypatch.setenv("JSONLOG", jsonfmt)
assert logfile_path() == expected
def test_logfile_path_default():
assert logfile_path() == "conf/logging.conf"

View File

@ -6,7 +6,5 @@ from app import app as application
from endpoints.verbs import verbs
if os.environ.get('DEBUGLOG') == 'true':
logging.config.fileConfig('conf/logging_debug.conf', disable_existing_loggers=False)
application.register_blueprint(verbs, url_prefix='/c1')

2
web.py
View File

@ -13,8 +13,6 @@ from endpoints.web import web
from endpoints.webhooks import webhooks
from endpoints.wellknown import wellknown
if os.environ.get('DEBUGLOG') == 'true':
logging.config.fileConfig('conf/logging_debug.conf', disable_existing_loggers=False)
application.register_blueprint(web)
application.register_blueprint(githubtrigger, url_prefix='/oauth2')

View File

@ -7,7 +7,7 @@ from app import app, storage
from data.database import UseThenDisconnect, BlobUpload
from data import model
from workers.worker import Worker
from util.log import logfile_path
logger = logging.getLogger(__name__)
@ -51,6 +51,6 @@ class BlobUploadCleanupWorker(Worker):
if __name__ == "__main__":
logging.config.fileConfig('conf/logging_debug.conf', disable_existing_loggers=False)
logging.config.fileConfig(logfile_path(debug=True), disable_existing_loggers=False)
worker = BlobUploadCleanupWorker()
worker.start()

View File

@ -3,6 +3,7 @@ import time
from app import app, storage, chunk_cleanup_queue
from workers.queueworker import QueueWorker, JobException
from util.log import logfile_path
logger = logging.getLogger(__name__)
@ -26,7 +27,7 @@ class ChunkCleanupWorker(QueueWorker):
if __name__ == "__main__":
logging.config.fileConfig('conf/logging.conf', disable_existing_loggers=False)
logging.config.fileConfig(logfile_path(debug=False), disable_existing_loggers=False)
engines = set([config[0] for config in app.config.get('DISTRIBUTED_STORAGE_CONFIG', {}).values()])
if 'SwiftStorage' not in engines:

View File

@ -7,6 +7,7 @@ from data.database import UseThenDisconnect
from data import model
from data.model.image import total_image_count
from util.locking import GlobalLock, LockNotAcquiredException
from util.log import logfile_path
from workers.worker import Worker
logger = logging.getLogger(__name__)
@ -44,7 +45,7 @@ class GlobalPrometheusStatsWorker(Worker):
def main():
logging.config.fileConfig('conf/logging_debug.conf', disable_existing_loggers=False)
logging.config.fileConfig(logfile_path(debug=True), disable_existing_loggers=False)
if not app.config.get('PROMETHEUS_AGGREGATOR_URL'):
logger.debug('Prometheus not enabled; skipping global stats reporting')

View File

@ -13,6 +13,7 @@ from data.model.log import (get_stale_logs, get_stale_logs_start_id,
get_stale_logs_cutoff_id, delete_stale_logs)
from data.userfiles import DelegateUserfiles
from util.locking import GlobalLock, LockNotAcquiredException
from util.log import logfile_path
from util.streamingjsonencoder import StreamingJSONEncoder
from workers.worker import Worker
@ -102,7 +103,7 @@ def log_dict(log):
def main():
logging.config.fileConfig('conf/logging_debug.conf', disable_existing_loggers=False)
logging.config.fileConfig(logfile_path(debug=True), disable_existing_loggers=False)
if not features.ACTION_LOG_ROTATION or None in [SAVE_PATH, SAVE_LOCATION]:
logger.debug('Action log rotation worker not enabled; skipping')

View File

@ -13,6 +13,7 @@ from data.model.image import (get_images_eligible_for_scan, get_image_pk_field,
from util.secscan.api import SecurityConfigValidator
from util.secscan.analyzer import LayerAnalyzer, PreemptedException
from util.migrate.allocator import yield_random_entries
from util.log import logfile_path
from endpoints.v2 import v2_bp
@ -106,6 +107,6 @@ if __name__ == '__main__':
while True:
time.sleep(100000)
logging.config.fileConfig('conf/logging_debug.conf', disable_existing_loggers=False)
logging.config.fileConfig(logfile_path(debug=True), disable_existing_loggers=False)
worker = SecurityWorker()
worker.start()

View File

@ -1,3 +1,4 @@
import os
import logging
import features
import time
@ -6,6 +7,7 @@ from app import app, storage, image_replication_queue
from data.database import CloseForLongOperation
from data import model
from workers.queueworker import QueueWorker, WorkerUnhealthyException
from util.log import logfile_path
logger = logging.getLogger(__name__)
@ -78,7 +80,7 @@ class StorageReplicationWorker(QueueWorker):
if __name__ == "__main__":
logging.config.fileConfig('conf/logging.conf', disable_existing_loggers=False)
logging.config.fileConfig(logfile_path(debug=False), disable_existing_loggers=False)
has_local_storage = False

View File

@ -7,6 +7,7 @@ from app import app, authentication
from data.users.teamsync import sync_teams_to_groups
from workers.worker import Worker
from util.timedeltastring import convert_to_timedelta
from util.log import logfile_path
logger = logging.getLogger(__name__)
@ -25,7 +26,7 @@ class TeamSynchronizationWorker(Worker):
def main():
logging.config.fileConfig('conf/logging_debug.conf', disable_existing_loggers=False)
logging.config.fileConfig(logfile_path(debug=True), disable_existing_loggers=False)
if not features.TEAM_SYNCING or not authentication.federated_service:
logger.debug('Team syncing is disabled; sleeping')

View File

@ -13,7 +13,7 @@ from raven import Client
from app import app
from data.database import UseThenDisconnect
from util.log import logfile_path
logger = logging.getLogger(__name__)
@ -63,7 +63,7 @@ class Worker(object):
pass
def start(self):
logging.config.fileConfig('conf/logging_debug.conf', disable_existing_loggers=False)
logging.config.fileConfig(logfile_path(debug=True), disable_existing_loggers=False)
if not app.config.get('SETUP_COMPLETE', False):
logger.info('Product setup is not yet complete; skipping worker startup')