cctools
|
Statistics describing a work queue. More...
#include <work_queue.h>
Data Fields | |
int | workers_connected |
Number of workers currently connected to the manager. More... | |
int | workers_init |
Number of workers connected, but that have not send their available resources report yet. More... | |
int | workers_idle |
Number of workers that are not running a task. More... | |
int | workers_busy |
Number of workers that are running at least one task. More... | |
int | workers_able |
Number of workers on which the largest task can run. More... | |
int | workers_joined |
Total number of worker connections that were established to the manager. More... | |
int | workers_removed |
Total number of worker connections that were released by the manager, idled-out, fast-aborted, or lost. More... | |
int | workers_released |
Total number of worker connections that were asked by the manager to disconnect. More... | |
int | workers_idled_out |
Total number of worker that disconnected for being idle. More... | |
int | workers_fast_aborted |
Total number of worker connections terminated for being too slow. More... | |
int | workers_blocked |
Total number of workers blocked by the manager. More... | |
int | workers_lost |
Total number of worker connections that were unexpectedly lost. More... | |
int | tasks_waiting |
Number of tasks waiting to be dispatched. More... | |
int | tasks_on_workers |
Number of tasks currently dispatched to some worker. More... | |
int | tasks_running |
Number of tasks currently executing at some worker. More... | |
int | tasks_with_results |
Number of tasks with retrieved results and waiting to be returned to user. More... | |
int | tasks_submitted |
Total number of tasks submitted to the queue. More... | |
int | tasks_dispatched |
Total number of tasks dispatch to workers. More... | |
int | tasks_done |
Total number of tasks completed and returned to user. More... | |
int | tasks_failed |
Total number of tasks completed and returned to user with result other than WQ_RESULT_SUCCESS. More... | |
int | tasks_cancelled |
Total number of tasks cancelled. More... | |
int | tasks_exhausted_attempts |
Total number of task executions that failed given resource exhaustion. More... | |
timestamp_t | time_when_started |
Absolute time at which the manager started. More... | |
timestamp_t | time_send |
Total time spent in sending tasks to workers (tasks descriptions, and input files.). More... | |
timestamp_t | time_receive |
Total time spent in receiving results from workers (output files.). More... | |
timestamp_t | time_send_good |
Total time spent in sending data to workers for tasks with result WQ_RESULT_SUCCESS. More... | |
timestamp_t | time_receive_good |
Total time spent in sending data to workers for tasks with result WQ_RESULT_SUCCESS. More... | |
timestamp_t | time_status_msgs |
Total time spent sending and receiving status messages to and from workers, including workers' standard output, new workers connections, resources updates, etc. More... | |
timestamp_t | time_internal |
Total time the queue spents in internal processing. More... | |
timestamp_t | time_polling |
Total time blocking waiting for worker communications (i.e., manager idle waiting for a worker message). More... | |
timestamp_t | time_application |
Total time spent outside work_queue_wait. More... | |
timestamp_t | time_workers_execute |
Total time workers spent executing done tasks. More... | |
timestamp_t | time_workers_execute_good |
Total time workers spent executing done tasks with result WQ_RESULT_SUCCESS. More... | |
timestamp_t | time_workers_execute_exhaustion |
Total time workers spent executing tasks that exhausted resources. More... | |
int64_t | bytes_sent |
Total number of file bytes (not including protocol control msg bytes) sent out to the workers by the manager. More... | |
int64_t | bytes_received |
Total number of file bytes (not including protocol control msg bytes) received from the workers by the manager. More... | |
double | bandwidth |
Average network bandwidth in MB/S observed by the manager when transferring to workers. More... | |
int | capacity_tasks |
The estimated number of tasks that this manager can effectively support. More... | |
int | capacity_cores |
The estimated number of workers' cores that this manager can effectively support. More... | |
int | capacity_memory |
The estimated number of workers' MB of RAM that this manager can effectively support. More... | |
int | capacity_disk |
The estimated number of workers' MB of disk that this manager can effectively support. More... | |
int | capacity_gpus |
The estimated number of workers' GPUs that this manager can effectively support. More... | |
int | capacity_instantaneous |
The estimated number of tasks that this manager can support considering only the most recently completed task. More... | |
int | capacity_weighted |
The estimated number of tasks that this manager can support placing greater weight on the most recently completed task. More... | |
int64_t | total_cores |
Total number of cores aggregated across the connected workers. More... | |
int64_t | total_memory |
Total memory in MB aggregated across the connected workers. More... | |
int64_t | total_disk |
Total disk space in MB aggregated across the connected workers. More... | |
int64_t | committed_cores |
Committed number of cores aggregated across the connected workers. More... | |
int64_t | committed_memory |
Committed memory in MB aggregated across the connected workers. More... | |
int64_t | committed_disk |
Committed disk space in MB aggregated across the connected workers. More... | |
int64_t | max_cores |
The highest number of cores observed among the connected workers. More... | |
int64_t | max_memory |
The largest memory size in MB observed among the connected workers. More... | |
int64_t | max_disk |
The largest disk space in MB observed among the connected workers. More... | |
int64_t | min_cores |
The lowest number of cores observed among the connected workers. More... | |
int64_t | min_memory |
The smallest memory size in MB observed among the connected workers. More... | |
int64_t | min_disk |
The smallest disk space in MB observed among the connected workers. More... | |
double | manager_load |
In the range of [0,1]. More... | |
int | total_workers_connected |
int | total_workers_joined |
int | total_workers_removed |
int | total_workers_lost |
int | total_workers_idled_out |
int | total_workers_fast_aborted |
int | tasks_complete |
int | total_tasks_dispatched |
int | total_tasks_complete |
int | total_tasks_failed |
int | total_tasks_cancelled |
int | total_exhausted_attempts |
timestamp_t | start_time |
timestamp_t | total_send_time |
timestamp_t | total_receive_time |
timestamp_t | total_good_transfer_time |
timestamp_t | total_execute_time |
timestamp_t | total_good_execute_time |
timestamp_t | total_exhausted_execute_time |
int64_t | total_bytes_sent |
int64_t | total_bytes_received |
double | capacity |
double | efficiency |
double | idle_percentage |
int64_t | total_gpus |
int64_t | committed_gpus |
int64_t | max_gpus |
int64_t | min_gpus |
int | port |
int | priority |
int | workers_ready |
int | workers_full |
int | total_worker_slots |
int | avg_capacity |
int | workers_blacklisted |
Statistics describing a work queue.
int work_queue_stats::workers_connected |
Number of workers currently connected to the manager.
int work_queue_stats::workers_init |
Number of workers connected, but that have not send their available resources report yet.
int work_queue_stats::workers_idle |
Number of workers that are not running a task.
int work_queue_stats::workers_busy |
Number of workers that are running at least one task.
int work_queue_stats::workers_able |
Number of workers on which the largest task can run.
int work_queue_stats::workers_joined |
Total number of worker connections that were established to the manager.
int work_queue_stats::workers_removed |
Total number of worker connections that were released by the manager, idled-out, fast-aborted, or lost.
int work_queue_stats::workers_released |
Total number of worker connections that were asked by the manager to disconnect.
int work_queue_stats::workers_idled_out |
Total number of worker that disconnected for being idle.
int work_queue_stats::workers_fast_aborted |
Total number of worker connections terminated for being too slow.
int work_queue_stats::workers_blocked |
Total number of workers blocked by the manager.
(Includes workers_fast_aborted.)
int work_queue_stats::workers_lost |
Total number of worker connections that were unexpectedly lost.
(does not include idled-out or fast-aborted)
int work_queue_stats::tasks_waiting |
Number of tasks waiting to be dispatched.
int work_queue_stats::tasks_on_workers |
Number of tasks currently dispatched to some worker.
int work_queue_stats::tasks_running |
Number of tasks currently executing at some worker.
int work_queue_stats::tasks_with_results |
Number of tasks with retrieved results and waiting to be returned to user.
int work_queue_stats::tasks_submitted |
Total number of tasks submitted to the queue.
int work_queue_stats::tasks_dispatched |
Total number of tasks dispatch to workers.
int work_queue_stats::tasks_done |
Total number of tasks completed and returned to user.
(includes tasks_failed)
int work_queue_stats::tasks_failed |
Total number of tasks completed and returned to user with result other than WQ_RESULT_SUCCESS.
int work_queue_stats::tasks_cancelled |
Total number of tasks cancelled.
int work_queue_stats::tasks_exhausted_attempts |
Total number of task executions that failed given resource exhaustion.
timestamp_t work_queue_stats::time_when_started |
Absolute time at which the manager started.
timestamp_t work_queue_stats::time_send |
Total time spent in sending tasks to workers (tasks descriptions, and input files.).
timestamp_t work_queue_stats::time_receive |
Total time spent in receiving results from workers (output files.).
timestamp_t work_queue_stats::time_send_good |
Total time spent in sending data to workers for tasks with result WQ_RESULT_SUCCESS.
timestamp_t work_queue_stats::time_receive_good |
Total time spent in sending data to workers for tasks with result WQ_RESULT_SUCCESS.
timestamp_t work_queue_stats::time_status_msgs |
Total time spent sending and receiving status messages to and from workers, including workers' standard output, new workers connections, resources updates, etc.
timestamp_t work_queue_stats::time_internal |
Total time the queue spents in internal processing.
timestamp_t work_queue_stats::time_polling |
Total time blocking waiting for worker communications (i.e., manager idle waiting for a worker message).
timestamp_t work_queue_stats::time_application |
Total time spent outside work_queue_wait.
timestamp_t work_queue_stats::time_workers_execute |
Total time workers spent executing done tasks.
timestamp_t work_queue_stats::time_workers_execute_good |
Total time workers spent executing done tasks with result WQ_RESULT_SUCCESS.
timestamp_t work_queue_stats::time_workers_execute_exhaustion |
Total time workers spent executing tasks that exhausted resources.
int64_t work_queue_stats::bytes_sent |
Total number of file bytes (not including protocol control msg bytes) sent out to the workers by the manager.
int64_t work_queue_stats::bytes_received |
Total number of file bytes (not including protocol control msg bytes) received from the workers by the manager.
double work_queue_stats::bandwidth |
Average network bandwidth in MB/S observed by the manager when transferring to workers.
int work_queue_stats::capacity_tasks |
The estimated number of tasks that this manager can effectively support.
int work_queue_stats::capacity_cores |
The estimated number of workers' cores that this manager can effectively support.
int work_queue_stats::capacity_memory |
The estimated number of workers' MB of RAM that this manager can effectively support.
int work_queue_stats::capacity_disk |
The estimated number of workers' MB of disk that this manager can effectively support.
int work_queue_stats::capacity_gpus |
The estimated number of workers' GPUs that this manager can effectively support.
int work_queue_stats::capacity_instantaneous |
The estimated number of tasks that this manager can support considering only the most recently completed task.
int work_queue_stats::capacity_weighted |
The estimated number of tasks that this manager can support placing greater weight on the most recently completed task.
int64_t work_queue_stats::total_cores |
Total number of cores aggregated across the connected workers.
int64_t work_queue_stats::total_memory |
Total memory in MB aggregated across the connected workers.
int64_t work_queue_stats::total_disk |
Total disk space in MB aggregated across the connected workers.
int64_t work_queue_stats::committed_cores |
Committed number of cores aggregated across the connected workers.
int64_t work_queue_stats::committed_memory |
Committed memory in MB aggregated across the connected workers.
int64_t work_queue_stats::committed_disk |
Committed disk space in MB aggregated across the connected workers.
int64_t work_queue_stats::max_cores |
The highest number of cores observed among the connected workers.
int64_t work_queue_stats::max_memory |
The largest memory size in MB observed among the connected workers.
int64_t work_queue_stats::max_disk |
The largest disk space in MB observed among the connected workers.
int64_t work_queue_stats::min_cores |
The lowest number of cores observed among the connected workers.
int64_t work_queue_stats::min_memory |
The smallest memory size in MB observed among the connected workers.
int64_t work_queue_stats::min_disk |
The smallest disk space in MB observed among the connected workers.
double work_queue_stats::manager_load |
In the range of [0,1].
If close to 1, then the manager is at full load and spends most of its time sending and receiving taks, and thus cannot accept connections from new workers. If close to 0, the manager is spending most of its time waiting for something to happen. deprecated fields:
int work_queue_stats::total_workers_connected |
int work_queue_stats::total_workers_joined |
int work_queue_stats::total_workers_removed |
int work_queue_stats::total_workers_lost |
int work_queue_stats::total_workers_idled_out |
int work_queue_stats::total_workers_fast_aborted |
int work_queue_stats::tasks_complete |
int work_queue_stats::total_tasks_dispatched |
int work_queue_stats::total_tasks_complete |
int work_queue_stats::total_tasks_failed |
int work_queue_stats::total_tasks_cancelled |
int work_queue_stats::total_exhausted_attempts |
timestamp_t work_queue_stats::start_time |
timestamp_t work_queue_stats::total_send_time |
timestamp_t work_queue_stats::total_receive_time |
timestamp_t work_queue_stats::total_good_transfer_time |
timestamp_t work_queue_stats::total_execute_time |
timestamp_t work_queue_stats::total_good_execute_time |
timestamp_t work_queue_stats::total_exhausted_execute_time |
int64_t work_queue_stats::total_bytes_sent |
int64_t work_queue_stats::total_bytes_received |
double work_queue_stats::capacity |
double work_queue_stats::efficiency |
double work_queue_stats::idle_percentage |
int64_t work_queue_stats::total_gpus |
int64_t work_queue_stats::committed_gpus |
int64_t work_queue_stats::max_gpus |
int64_t work_queue_stats::min_gpus |
int work_queue_stats::port |
int work_queue_stats::priority |
int work_queue_stats::workers_ready |
int work_queue_stats::workers_full |
int work_queue_stats::total_worker_slots |
int work_queue_stats::avg_capacity |
int work_queue_stats::workers_blacklisted |