Monitoring
Implement comprehensive monitoring for DDEX Suite applications to ensure reliability and performance.
Overview
Effective monitoring provides:
- Real-time performance metrics
- Error tracking and alerting
- Usage analytics and insights
- System health monitoring
- Proactive issue detection
Application Performance Monitoring (APM)
New Relic Integration
import newrelic from 'newrelic';
import { DDEXParser } from 'ddex-parser';
import { DDEXBuilder } from 'ddex-builder';
class MonitoredDDEXService {
private parser = new DDEXParser();
private builder = new DDEXBuilder();
async parseWithMonitoring(xmlContent: string) {
return newrelic.startSegment('ddex-parse', true, async () => {
const startTime = Date.now();
try {
const result = await this.parser.parse(xmlContent);
// Custom metrics
newrelic.recordMetric('Custom/DDEX/ParseSuccess', 1);
newrelic.recordMetric('Custom/DDEX/ParseDuration', Date.now() - startTime);
newrelic.recordMetric('Custom/DDEX/FileSize', xmlContent.length);
// Custom attributes
newrelic.addCustomAttribute('ddex_version', result.version);
newrelic.addCustomAttribute('release_count', result.flat.releases.length);
newrelic.addCustomAttribute('track_count', result.flat.tracks.length);
return result;
} catch (error) {
newrelic.recordMetric('Custom/DDEX/ParseError', 1);
newrelic.noticeError(error, {
fileSize: xmlContent.length,
operation: 'parse'
});
throw error;
}
});
}
async buildWithMonitoring(data: any, options: any = {}) {
return newrelic.startSegment('ddex-build', true, async () => {
const startTime = Date.now();
try {
const xml = await this.builder.build(data, options);
// Success metrics
newrelic.recordMetric('Custom/DDEX/BuildSuccess', 1);
newrelic.recordMetric('Custom/DDEX/BuildDuration', Date.now() - startTime);
newrelic.recordMetric('Custom/DDEX/OutputSize', xml.length);
return xml;
} catch (error) {
newrelic.recordMetric('Custom/DDEX/BuildError', 1);
newrelic.noticeError(error, {
dataSize: JSON.stringify(data).length,
operation: 'build',
validationLevel: options.validationLevel
});
throw error;
}
});
}
}
Datadog Integration
from datadog import initialize, statsd, api
from ddex_parser import DDEXParser
from ddex_builder import DDEXBuilder
import time
import logging
# Initialize Datadog
initialize(api_key='your-api-key', app_key='your-app-key')
class MonitoredDDEXProcessor:
def __init__(self):
self.parser = DDEXParser()
self.builder = DDEXBuilder()
self.logger = logging.getLogger(__name__)
@statsd.timed('ddex.parse.duration')
def parse_with_monitoring(self, xml_content: str):
start_time = time.time()
# Increment parse attempts
statsd.increment('ddex.parse.attempts')
try:
result = self.parser.parse(xml_content)
# Success metrics
statsd.increment('ddex.parse.success')
statsd.histogram('ddex.file_size', len(xml_content))
statsd.histogram('ddex.releases_count', len(result.flat.releases))
statsd.histogram('ddex.tracks_count', len(result.flat.tracks))
# Custom tags
tags = [
f'ddex_version:{result.version}',
f'has_releases:{len(result.flat.releases) > 0}',
f'file_size_category:{self._categorize_file_size(len(xml_content))}'
]
statsd.increment('ddex.parse.success', tags=tags)
self.logger.info("Successfully parsed DDEX", extra={
'ddex_version': result.version,
'releases_count': len(result.flat.releases),
'tracks_count': len(result.flat.tracks),
'processing_time_ms': (time.time() - start_time) * 1000
})
return result
except Exception as e:
statsd.increment('ddex.parse.error')
statsd.increment('ddex.parse.error', tags=[f'error_type:{type(e).__name__}'])
self.logger.error("Failed to parse DDEX", extra={
'error': str(e),
'error_type': type(e).__name__,
'file_size': len(xml_content)
}, exc_info=True)
raise
@statsd.timed('ddex.build.duration')
def build_with_monitoring(self, data, **options):
start_time = time.time()
statsd.increment('ddex.build.attempts')
try:
xml = self.builder.build(data, **options)
# Success metrics
statsd.increment('ddex.build.success')
statsd.histogram('ddex.output_size', len(xml))
# Validation metrics
if 'validation_level' in options:
statsd.increment(
'ddex.build.success',
tags=[f'validation_level:{options["validation_level"]}']
)
return xml
except Exception as e:
statsd.increment('ddex.build.error')
statsd.increment('ddex.build.error', tags=[f'error_type:{type(e).__name__}'])
self.logger.error("Failed to build DDEX", extra={
'error': str(e),
'error_type': type(e).__name__,
'data_size': len(str(data))
}, exc_info=True)
raise
def _categorize_file_size(self, size):
if size < 10000:
return 'small'
elif size < 100000:
return 'medium'
elif size < 1000000:
return 'large'
else:
return 'xlarge'
Prometheus Metrics
from prometheus_client import Counter, Histogram, Gauge, Info, start_http_server
from ddex_parser import DDEXParser
from ddex_builder import DDEXBuilder
import time
# Define metrics
ddex_operations_total = Counter(
'ddex_operations_total',
'Total number of DDEX operations',
['operation', 'status', 'ddex_version']
)
ddex_processing_duration_seconds = Histogram(
'ddex_processing_duration_seconds',
'Time spent processing DDEX files',
['operation'],
buckets=[0.01, 0.05, 0.1, 0.5, 1.0, 2.0, 5.0, 10.0]
)
ddex_file_size_bytes = Histogram(
'ddex_file_size_bytes',
'Size of DDEX files processed',
['operation'],
buckets=[1000, 10000, 100000, 1000000, 10000000]
)
ddex_releases_processed = Counter(
'ddex_releases_processed_total',
'Total number of releases processed'
)
ddex_tracks_processed = Counter(
'ddex_tracks_processed_total',
'Total number of tracks processed'
)
ddex_active_operations = Gauge(
'ddex_active_operations',
'Number of currently active DDEX operations',
['operation']
)
ddex_info = Info(
'ddex_suite_info',
'Information about DDEX Suite version and configuration'
)
class PrometheusMonitoredDDEXService:
def __init__(self):
self.parser = DDEXParser()
self.builder = DDEXBuilder()
# Set static info
ddex_info.info({
'version': '0.2.5',
'parser_features': 'graph,flat,streaming',
'builder_features': 'deterministic,validation,batch'
})
def parse_with_metrics(self, xml_content: str):
ddex_active_operations.labels(operation='parse').inc()
with ddex_processing_duration_seconds.labels(operation='parse').time():
try:
result = self.parser.parse(xml_content)
# Success metrics
ddex_operations_total.labels(
operation='parse',
status='success',
ddex_version=result.version
).inc()
ddex_file_size_bytes.labels(operation='parse').observe(len(xml_content))
ddex_releases_processed.inc(len(result.flat.releases))
ddex_tracks_processed.inc(len(result.flat.tracks))
return result
except Exception as e:
ddex_operations_total.labels(
operation='parse',
status='error',
ddex_version='unknown'
).inc()
raise
finally:
ddex_active_operations.labels(operation='parse').dec()
def build_with_metrics(self, data, **options):
ddex_active_operations.labels(operation='build').inc()
with ddex_processing_duration_seconds.labels(operation='build').time():
try:
xml = self.builder.build(data, **options)
# Success metrics
ddex_operations_total.labels(
operation='build',
status='success',
ddex_version=options.get('version', '4.3')
).inc()
ddex_file_size_bytes.labels(operation='build').observe(len(xml))
# Count releases in input data
if 'releases' in data:
ddex_releases_processed.inc(len(data['releases']))
return xml
except Exception as e:
ddex_operations_total.labels(
operation='build',
status='error',
ddex_version='unknown'
).inc()
raise
finally:
ddex_active_operations.labels(operation='build').dec()
# Start Prometheus metrics server
start_http_server(8000)
Structured Logging
Python Logging
import logging
import json
from datetime import datetime
from ddex_parser import DDEXParser
from ddex_builder import DDEXBuilder
# Configure structured logging
logging.basicConfig(
level=logging.INFO,
format='%(message)s', # We'll format JSON ourselves
handlers=[
logging.StreamHandler(),
logging.FileHandler('/var/log/ddex-suite.log')
]
)
class StructuredLogger:
def __init__(self, name):
self.logger = logging.getLogger(name)
def _log_structured(self, level, message, **kwargs):
log_entry = {
'timestamp': datetime.utcnow().isoformat(),
'level': level,
'message': message,
'service': 'ddex-suite',
**kwargs
}
getattr(self.logger, level.lower())(json.dumps(log_entry))
def info(self, message, **kwargs):
self._log_structured('INFO', message, **kwargs)
def error(self, message, **kwargs):
self._log_structured('ERROR', message, **kwargs)
def warning(self, message, **kwargs):
self._log_structured('WARNING', message, **kwargs)
class LoggedDDEXProcessor:
def __init__(self):
self.parser = DDEXParser()
self.builder = DDEXBuilder()
self.logger = StructuredLogger(__name__)
def parse_with_logging(self, xml_content: str, request_id: str = None):
start_time = time.time()
self.logger.info("Starting DDEX parse operation",
request_id=request_id,
operation='parse',
file_size=len(xml_content)
)
try:
result = self.parser.parse(xml_content)
processing_time = (time.time() - start_time) * 1000
self.logger.info("Successfully parsed DDEX",
request_id=request_id,
operation='parse',
ddex_version=result.version,
releases_count=len(result.flat.releases),
tracks_count=len(result.flat.tracks),
processing_time_ms=processing_time,
file_size=len(xml_content)
)
return result
except Exception as e:
processing_time = (time.time() - start_time) * 1000
self.logger.error("Failed to parse DDEX",
request_id=request_id,
operation='parse',
error=str(e),
error_type=type(e).__name__,
processing_time_ms=processing_time,
file_size=len(xml_content)
)
raise
def build_with_logging(self, data, request_id: str = None, **options):
start_time = time.time()
data_size = len(json.dumps(data, default=str))
self.logger.info("Starting DDEX build operation",
request_id=request_id,
operation='build',
data_size=data_size,
validation_level=options.get('validation_level', 'standard')
)
try:
xml = self.builder.build(data, **options)
processing_time = (time.time() - start_time) * 1000
self.logger.info("Successfully built DDEX",
request_id=request_id,
operation='build',
data_size=data_size,
output_size=len(xml),
processing_time_ms=processing_time,
validation_level=options.get('validation_level', 'standard')
)
return xml
except Exception as e:
processing_time = (time.time() - start_time) * 1000
self.logger.error("Failed to build DDEX",
request_id=request_id,
operation='build',
error=str(e),
error_type=type(e).__name__,
processing_time_ms=processing_time,
data_size=data_size,
validation_level=options.get('validation_level', 'standard')
)
raise
JavaScript Logging
import winston from 'winston';
import { DDEXParser } from 'ddex-parser';
import { DDEXBuilder } from 'ddex-builder';
// Configure Winston logger
const logger = winston.createLogger({
level: 'info',
format: winston.format.combine(
winston.format.timestamp(),
winston.format.errors({ stack: true }),
winston.format.json()
),
defaultMeta: { service: 'ddex-suite' },
transports: [
new winston.transports.File({ filename: 'error.log', level: 'error' }),
new winston.transports.File({ filename: 'combined.log' }),
new winston.transports.Console({
format: winston.format.combine(
winston.format.colorize(),
winston.format.simple()
)
})
]
});
export class LoggedDDEXService {
private parser = new DDEXParser();
private builder = new DDEXBuilder();
async parseWithLogging(xmlContent: string, requestId?: string): Promise<any> {
const startTime = Date.now();
logger.info('Starting DDEX parse operation', {
requestId,
operation: 'parse',
fileSize: xmlContent.length
});
try {
const result = await this.parser.parse(xmlContent);
const processingTime = Date.now() - startTime;
logger.info('Successfully parsed DDEX', {
requestId,
operation: 'parse',
ddexVersion: result.version,
releasesCount: result.flat.releases.length,
tracksCount: result.flat.tracks.length,
processingTimeMs: processingTime,
fileSize: xmlContent.length
});
return result;
} catch (error) {
const processingTime = Date.now() - startTime;
logger.error('Failed to parse DDEX', {
requestId,
operation: 'parse',
error: error.message,
errorType: error.constructor.name,
processingTimeMs: processingTime,
fileSize: xmlContent.length,
stack: error.stack
});
throw error;
}
}
async buildWithLogging(data: any, requestId?: string, options: any = {}): Promise<string> {
const startTime = Date.now();
const dataSize = JSON.stringify(data).length;
logger.info('Starting DDEX build operation', {
requestId,
operation: 'build',
dataSize,
validationLevel: options.validationLevel || 'standard'
});
try {
const xml = await this.builder.build(data, options);
const processingTime = Date.now() - startTime;
logger.info('Successfully built DDEX', {
requestId,
operation: 'build',
dataSize,
outputSize: xml.length,
processingTimeMs: processingTime,
validationLevel: options.validationLevel || 'standard'
});
return xml;
} catch (error) {
const processingTime = Date.now() - startTime;
logger.error('Failed to build DDEX', {
requestId,
operation: 'build',
error: error.message,
errorType: error.constructor.name,
processingTimeMs: processingTime,
dataSize,
validationLevel: options.validationLevel || 'standard',
stack: error.stack
});
throw error;
}
}
}
Error Tracking
Sentry Integration
import * as Sentry from '@sentry/node';
import { DDEXParser } from 'ddex-parser';
import { DDEXBuilder } from 'ddex-builder';
Sentry.init({
dsn: process.env.SENTRY_DSN,
environment: process.env.NODE_ENV,
tracesSampleRate: 1.0,
});
export class SentryMonitoredDDEXService {
private parser = new DDEXParser();
private builder = new DDEXBuilder();
async parseWithErrorTracking(xmlContent: string, userId?: string) {
return Sentry.startTransaction({
name: 'ddex-parse',
op: 'ddex.parse'
}, async (transaction) => {
Sentry.setContext('ddex_operation', {
operation: 'parse',
fileSize: xmlContent.length,
timestamp: new Date().toISOString()
});
if (userId) {
Sentry.setUser({ id: userId });
}
try {
const result = await this.parser.parse(xmlContent);
// Add success context
Sentry.setContext('ddex_result', {
version: result.version,
releasesCount: result.flat.releases.length,
tracksCount: result.flat.tracks.length
});
transaction.setStatus('ok');
return result;
} catch (error) {
// Add error context
Sentry.setContext('ddex_error', {
fileSize: xmlContent.length,
errorType: error.constructor.name,
operation: 'parse'
});
// Add breadcrumb
Sentry.addBreadcrumb({
message: 'DDEX parse attempt failed',
category: 'ddex',
level: 'error',
data: {
fileSize: xmlContent.length,
errorMessage: error.message
}
});
transaction.setStatus('internal_error');
Sentry.captureException(error);
throw error;
}
});
}
async buildWithErrorTracking(data: any, userId?: string, options: any = {}) {
return Sentry.startTransaction({
name: 'ddex-build',
op: 'ddex.build'
}, async (transaction) => {
const dataSize = JSON.stringify(data).length;
Sentry.setContext('ddex_operation', {
operation: 'build',
dataSize,
validationLevel: options.validationLevel,
timestamp: new Date().toISOString()
});
if (userId) {
Sentry.setUser({ id: userId });
}
try {
const xml = await this.builder.build(data, options);
Sentry.setContext('ddex_result', {
outputSize: xml.length,
validationLevel: options.validationLevel
});
transaction.setStatus('ok');
return xml;
} catch (error) {
Sentry.setContext('ddex_error', {
dataSize,
validationLevel: options.validationLevel,
errorType: error.constructor.name,
operation: 'build'
});
Sentry.addBreadcrumb({
message: 'DDEX build attempt failed',
category: 'ddex',
level: 'error',
data: {
dataSize,
errorMessage: error.message,
validationLevel: options.validationLevel
}
});
transaction.setStatus('internal_error');
Sentry.captureException(error);
throw error;
}
});
}
}
Health Checks
import express from 'express';
import { DDEXParser } from 'ddex-parser';
import { DDEXBuilder } from 'ddex-builder';
const app = express();
const parser = new DDEXParser();
const builder = new DDEXBuilder();
// Basic health check
app.get('/health', (req, res) => {
res.json({
status: 'healthy',
timestamp: new Date().toISOString(),
uptime: process.uptime(),
memory: process.memoryUsage(),
version: process.env.npm_package_version
});
});
// Detailed health check with dependencies
app.get('/health/detailed', async (req, res) => {
const checks = {
ddex_parser: await checkParserHealth(),
ddex_builder: await checkBuilderHealth(),
database: await checkDatabaseHealth(),
memory: checkMemoryHealth(),
disk: await checkDiskHealth()
};
const overallHealthy = Object.values(checks).every(check => check.healthy);
const statusCode = overallHealthy ? 200 : 503;
res.status(statusCode).json({
status: overallHealthy ? 'healthy' : 'unhealthy',
timestamp: new Date().toISOString(),
checks
});
});
async function checkParserHealth() {
try {
const testXML = `
<?xml version="1.0" encoding="UTF-8"?>
<NewReleaseMessage xmlns="http://ddex.net/xml/ern/43">
<MessageHeader>
<MessageThreadId>test</MessageThreadId>
<MessageId>test</MessageId>
<MessageSender>
<PartyId>test</PartyId>
</MessageSender>
<MessageRecipient>
<PartyId>test</PartyId>
</MessageRecipient>
<MessageCreatedDateTime>2023-01-01T00:00:00Z</MessageCreatedDateTime>
<MessageSchemaVersionId>ern/43</MessageSchemaVersionId>
</MessageHeader>
<UpdateIndicator>OriginalMessage</UpdateIndicator>
<CatalogTransfer>
<CatalogTransferType>FullCatalog</CatalogTransferType>
</CatalogTransfer>
<WorkList/>
<CueSheetList/>
</NewReleaseMessage>
`;
const startTime = Date.now();
await parser.parse(testXML);
const responseTime = Date.now() - startTime;
return {
healthy: true,
responseTimeMs: responseTime,
lastChecked: new Date().toISOString()
};
} catch (error) {
return {
healthy: false,
error: error.message,
lastChecked: new Date().toISOString()
};
}
}
async function checkBuilderHealth() {
try {
const testData = {
releases: [{
id: 'test-release',
title: 'Health Check Release',
artist: 'Test Artist'
}]
};
const startTime = Date.now();
await builder.build(testData);
const responseTime = Date.now() - startTime;
return {
healthy: true,
responseTimeMs: responseTime,
lastChecked: new Date().toISOString()
};
} catch (error) {
return {
healthy: false,
error: error.message,
lastChecked: new Date().toISOString()
};
}
}
function checkMemoryHealth() {
const usage = process.memoryUsage();
const maxHeapSize = 512 * 1024 * 1024; // 512MB threshold
const healthy = usage.heapUsed < maxHeapSize;
return {
healthy,
heapUsedMB: Math.round(usage.heapUsed / 1024 / 1024),
heapTotalMB: Math.round(usage.heapTotal / 1024 / 1024),
externalMB: Math.round(usage.external / 1024 / 1024),
thresholdMB: Math.round(maxHeapSize / 1024 / 1024),
lastChecked: new Date().toISOString()
};
}
Dashboard Configuration
Grafana Dashboard JSON
{
"dashboard": {
"id": null,
"title": "DDEX Suite Monitoring",
"tags": ["ddex", "monitoring"],
"timezone": "browser",
"panels": [
{
"id": 1,
"title": "Operations Per Second",
"type": "stat",
"targets": [
{
"expr": "rate(ddex_operations_total[5m])",
"legendFormat": "{{operation}}"
}
],
"fieldConfig": {
"defaults": {
"unit": "ops"
}
}
},
{
"id": 2,
"title": "Processing Duration",
"type": "graph",
"targets": [
{
"expr": "histogram_quantile(0.95, rate(ddex_processing_duration_seconds_bucket[5m]))",
"legendFormat": "95th percentile"
},
{
"expr": "histogram_quantile(0.50, rate(ddex_processing_duration_seconds_bucket[5m]))",
"legendFormat": "Median"
}
]
},
{
"id": 3,
"title": "Error Rate",
"type": "graph",
"targets": [
{
"expr": "rate(ddex_operations_total{status=\"error\"}[5m]) / rate(ddex_operations_total[5m])",
"legendFormat": "Error Rate"
}
],
"yAxes": [
{
"unit": "percentunit",
"max": 1,
"min": 0
}
]
},
{
"id": 4,
"title": "Active Operations",
"type": "graph",
"targets": [
{
"expr": "ddex_active_operations",
"legendFormat": "{{operation}}"
}
]
},
{
"id": 5,
"title": "File Size Distribution",
"type": "heatmap",
"targets": [
{
"expr": "rate(ddex_file_size_bytes_bucket[5m])",
"legendFormat": "{{le}}"
}
]
}
],
"time": {
"from": "now-1h",
"to": "now"
},
"refresh": "10s"
}
}
Alerting Rules
Prometheus Alerting
# alerts.yml
groups:
- name: ddex-suite
rules:
- alert: HighErrorRate
expr: rate(ddex_operations_total{status="error"}[5m]) / rate(ddex_operations_total[5m]) > 0.1
for: 2m
labels:
severity: warning
annotations:
summary: "High DDEX error rate detected"
description: "Error rate is {{ $value | humanizePercentage }} for the last 5 minutes"
- alert: SlowProcessing
expr: histogram_quantile(0.95, rate(ddex_processing_duration_seconds_bucket[5m])) > 5
for: 5m
labels:
severity: warning
annotations:
summary: "DDEX processing is slow"
description: "95th percentile processing time is {{ $value }}s"
- alert: ServiceDown
expr: up{job="ddex-suite"} == 0
for: 1m
labels:
severity: critical
annotations:
summary: "DDEX Suite service is down"
description: "Service has been down for more than 1 minute"
- alert: HighMemoryUsage
expr: process_resident_memory_bytes{job="ddex-suite"} > 1e+9
for: 5m
labels:
severity: warning
annotations:
summary: "High memory usage"
description: "Memory usage is {{ $value | humanizeBytes }}"
Best Practices
- Metrics Collection: Collect both technical and business metrics
- Structured Logging: Use JSON format for easy parsing and analysis
- Error Context: Include relevant context in error reports
- Health Checks: Implement comprehensive health checks
- Alerting: Set up proactive alerts for critical issues
- Dashboard Design: Create clear, actionable dashboards
- Retention Policies: Configure appropriate data retention
- Privacy: Avoid logging sensitive data (API keys, personal info)
- Performance Impact: Monitor the monitoring overhead
- Documentation: Document your metrics and alerting strategy