workflow:
  id: openshift-monitoring-and-remediation
  name: OpenShift Monitoring and Remediation
  description: |
    Comprehensive OpenShift monitoring workflow that demonstrates:
    - Getting cluster information (projects, pods, routes, deployment configs)
    - Monitoring pod health and events
    - Automatic remediation actions (restart pods, scale deployments)
    - Alert-driven workflows for OpenShift clusters
  triggers:
    - type: manual
    - type: alert
      filters:
        - key: source
          value: openshift
        - key: severity
          value: critical
  steps:
    # Get all OpenShift projects
    - name: get-projects
      provider:
        type: openshift
        config: "{{ providers.openshift }}"
        with:
          command_type: get_projects

    # Get all pods across namespaces
    - name: get-all-pods
      provider:
        type: openshift
        config: "{{ providers.openshift }}"
        with:
          command_type: get_pods

    # Get deployment configs
    - name: get-deployment-configs
      provider:
        type: openshift
        config: "{{ providers.openshift }}"
        with:
          command_type: get_deploymentconfigs

    # Get routes
    - name: get-routes
      provider:
        type: openshift
        config: "{{ providers.openshift }}"
        with:
          command_type: get_routes

    # Get node pressure conditions
    - name: get-node-pressure
      provider:
        type: openshift
        config: "{{ providers.openshift }}"
        with:
          command_type: get_node_pressure

    # Get events for a specific namespace (if alert provides namespace)
    - name: get-events
      if: "{{ alert.namespace }}"
      provider:
        type: openshift
        config: "{{ providers.openshift }}"
        with:
          command_type: get_events
          namespace: "{{ alert.namespace }}"

    # Get pod logs for failing pods (if alert provides pod name)
    - name: get-pod-logs
      if: "{{ alert.pod_name and alert.namespace }}"
      provider:
        type: openshift
        config: "{{ providers.openshift }}"
        with:
          command_type: get_logs
          namespace: "{{ alert.namespace }}"
          pod_name: "{{ alert.pod_name }}"
          tail_lines: 50

  actions:
    # Report cluster overview
    - name: report-cluster-overview
      provider:
        type: console
        with:
          message: |
            🔍 OpenShift Cluster Overview:
            - Projects: {{ steps.get-projects.results | length }}
            - Total Pods: {{ steps.get-all-pods.results | length }}
            - Deployment Configs: {{ steps.get-deployment-configs.results | length }}
            - Routes: {{ steps.get-routes.results | length }}
            - Node Pressure Issues: {{ steps.get-node-pressure.results | selectattr('conditions', 'ne', []) | list | length }}

    # Alert on failing pods
    - name: alert-failing-pods
      foreach: "{{ steps.get-all-pods.results | selectattr('status.phase', 'ne', 'Running') | selectattr('status.phase', 'ne', 'Succeeded') }}"
      provider:
        type: console
        with:
          message: |
            ⚠️ Pod Issue Detected:
            - Pod: {{ foreach.value.metadata.name }}
            - Namespace: {{ foreach.value.metadata.namespace }}
            - Status: {{ foreach.value.status.phase }}
            - Node: {{ foreach.value.spec.nodeName }}

    # Restart failing pods automatically (CrashLoopBackOff, Failed)
    - name: restart-failed-pods
      foreach: "{{ steps.get-all-pods.results | selectattr('status.phase', 'in', ['CrashLoopBackOff', 'Failed']) }}"
      provider:
        type: openshift
        config: "{{ providers.openshift }}"
        with:
          action: restart_pod
          namespace: "{{ foreach.value.metadata.namespace }}"
          pod_name: "{{ foreach.value.metadata.name }}"
          message: "Auto-restarting failed pod {{ foreach.value.metadata.name }}"

    # Scale up deployment if alert indicates high load
    - name: scale-deployment-on-high-load
      if: "{{ alert.deployment_name and alert.namespace and alert.scale_up }}"
      provider:
        type: openshift
        config: "{{ providers.openshift }}"
        with:
          action: scale_deployment
          namespace: "{{ alert.namespace }}"
          deployment_name: "{{ alert.deployment_name }}"
          replicas: "{{ alert.target_replicas | default(3) }}"

    # Scale up deployment config if specified
    - name: scale-deploymentconfig-on-demand
      if: "{{ alert.deploymentconfig_name and alert.namespace and alert.scale_up }}"
      provider:
        type: openshift
        config: "{{ providers.openshift }}"
        with:
          action: scale_deploymentconfig
          namespace: "{{ alert.namespace }}"
          deploymentconfig_name: "{{ alert.deploymentconfig_name }}"
          replicas: "{{ alert.target_replicas | default(2) }}"

    # Restart deployment on critical alerts
    - name: restart-deployment-on-critical-alert
      if: "{{ alert.severity == 'critical' and alert.deployment_name and alert.namespace }}"
      provider:
        type: openshift
        config: "{{ providers.openshift }}"
        with:
          action: rollout_restart
          kind: "deployment"
          name: "{{ alert.deployment_name }}"
          namespace: "{{ alert.namespace }}"

    # Restart deployment config on critical alerts
    - name: restart-deploymentconfig-on-critical-alert
      if: "{{ alert.severity == 'critical' and alert.deploymentconfig_name and alert.namespace }}"
      provider:
        type: openshift
        config: "{{ providers.openshift }}"
        with:
          action: rollout_restart
          kind: "deploymentconfig"
          name: "{{ alert.deploymentconfig_name }}"
          namespace: "{{ alert.namespace }}"

    # Send notification with detailed information
    - name: send-notification
      if: "{{ alert }}"
      provider:
        type: slack
        config: "{{ providers.slack }}"
        with:
          message: |
            🚨 OpenShift Alert: {{ alert.name }}
            
            📊 Cluster Status:
            • Projects: {{ steps.get-projects.results | length }}
            • Total Pods: {{ steps.get-all-pods.results | length }}
            • Failing Pods: {{ steps.get-all-pods.results | selectattr('status.phase', 'ne', 'Running') | selectattr('status.phase', 'ne', 'Succeeded') | list | length }}
            
            🔍 Alert Details:
            • Severity: {{ alert.severity }}
            • Source: {{ alert.source }}
            • Namespace: {{ alert.namespace | default('N/A') }}
            • Pod: {{ alert.pod_name | default('N/A') }}
            
            🛠️ Actions Taken:
            {% if alert.deployment_name and alert.scale_up %}• Scaled deployment {{ alert.deployment_name }} to {{ alert.target_replicas | default(3) }} replicas{% endif %}
            {% if alert.deploymentconfig_name and alert.scale_up %}• Scaled DeploymentConfig {{ alert.deploymentconfig_name }} to {{ alert.target_replicas | default(2) }} replicas{% endif %}
            {% if alert.severity == 'critical' and (alert.deployment_name or alert.deploymentconfig_name) %}• Performed rollout restart{% endif %}

# Example alert payloads to test this workflow:

# Manual trigger for cluster overview:
# No additional data needed

# High load scaling scenario:
# {
#   "name": "High CPU Usage",
#   "severity": "warning", 
#   "source": "openshift",
#   "namespace": "production",
#   "deployment_name": "web-app",
#   "scale_up": true,
#   "target_replicas": 5
# }

# Critical pod failure:
# {
#   "name": "Pod CrashLoopBackOff",
#   "severity": "critical",
#   "source": "openshift", 
#   "namespace": "production",
#   "pod_name": "web-app-123-abc",
#   "deployment_name": "web-app"
# }

# DeploymentConfig scaling:
# {
#   "name": "Scale DeploymentConfig",
#   "severity": "warning",
#   "source": "openshift",
#   "namespace": "staging", 
#   "deploymentconfig_name": "api-server",
#   "scale_up": true,
#   "target_replicas": 3
# }