2 * Copyright 2015 Samuel Pitoiset
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
23 #include "nvc0/nvc0_context.h"
24 #include "nvc0/nvc0_query_hw_metric.h"
25 #include "nvc0/nvc0_query_hw_sm.h"
27 #define _Q(i,n,t) { NVC0_HW_METRIC_QUERY_##i, n, PIPE_DRIVER_QUERY_TYPE_##t }
28 struct nvc0_hw_metric_cfg {
31 enum pipe_driver_query_type type;
32 } nvc0_hw_metric_queries[] = {
33 _Q(ACHIEVED_OCCUPANCY, "metric-achieved_occupancy", PERCENTAGE ),
34 _Q(BRANCH_EFFICIENCY, "metric-branch_efficiency", PERCENTAGE ),
35 _Q(INST_ISSUED, "metric-inst_issued", UINT64 ),
36 _Q(INST_PER_WRAP, "metric-inst_per_wrap", UINT64 ),
37 _Q(INST_REPLAY_OVERHEAD, "metric-inst_replay_overhead", UINT64 ),
38 _Q(ISSUED_IPC, "metric-issued_ipc", UINT64 ),
39 _Q(ISSUE_SLOTS, "metric-issue_slots", UINT64 ),
40 _Q(ISSUE_SLOT_UTILIZATION, "metric-issue_slot_utilization", PERCENTAGE ),
41 _Q(IPC, "metric-ipc", UINT64 ),
42 _Q(SHARED_REPLAY_OVERHEAD, "metric-shared_replay_overhead", UINT64 ),
47 static inline const struct nvc0_hw_metric_cfg *
48 nvc0_hw_metric_get_cfg(unsigned metric_id)
52 for (i = 0; i < ARRAY_SIZE(nvc0_hw_metric_queries); i++) {
53 if (nvc0_hw_metric_queries[i].id == metric_id)
54 return &nvc0_hw_metric_queries[i];
60 struct nvc0_hw_metric_query_cfg {
66 #define _SM(n) NVC0_HW_SM_QUERY(NVC0_HW_SM_QUERY_ ##n)
68 /* ==== Compute capability 2.0 (GF100/GF110) ==== */
69 static const struct nvc0_hw_metric_query_cfg
70 sm20_achieved_occupancy =
72 .type = NVC0_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY,
73 .queries[0] = _SM(ACTIVE_WARPS),
74 .queries[1] = _SM(ACTIVE_CYCLES),
78 static const struct nvc0_hw_metric_query_cfg
79 sm20_branch_efficiency =
81 .type = NVC0_HW_METRIC_QUERY_BRANCH_EFFICIENCY,
82 .queries[0] = _SM(BRANCH),
83 .queries[1] = _SM(DIVERGENT_BRANCH),
87 static const struct nvc0_hw_metric_query_cfg
90 .type = NVC0_HW_METRIC_QUERY_INST_PER_WRAP,
91 .queries[0] = _SM(INST_EXECUTED),
92 .queries[1] = _SM(WARPS_LAUNCHED),
96 static const struct nvc0_hw_metric_query_cfg
97 sm20_inst_replay_overhead =
99 .type = NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD,
100 .queries[0] = _SM(INST_ISSUED),
101 .queries[1] = _SM(INST_EXECUTED),
105 static const struct nvc0_hw_metric_query_cfg
108 .type = NVC0_HW_METRIC_QUERY_ISSUED_IPC,
109 .queries[0] = _SM(INST_ISSUED),
110 .queries[1] = _SM(ACTIVE_CYCLES),
114 static const struct nvc0_hw_metric_query_cfg
115 sm20_issue_slot_utilization =
117 .type = NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION,
118 .queries[0] = _SM(INST_ISSUED),
119 .queries[1] = _SM(ACTIVE_CYCLES),
123 static const struct nvc0_hw_metric_query_cfg
126 .type = NVC0_HW_METRIC_QUERY_IPC,
127 .queries[0] = _SM(INST_EXECUTED),
128 .queries[1] = _SM(ACTIVE_CYCLES),
132 static const struct nvc0_hw_metric_query_cfg *sm20_hw_metric_queries[] =
134 &sm20_achieved_occupancy,
135 &sm20_branch_efficiency,
137 &sm20_inst_replay_overhead,
139 &sm20_issue_slot_utilization,
143 /* ==== Compute capability 2.1 (GF108+ except GF110) ==== */
144 static const struct nvc0_hw_metric_query_cfg
147 .type = NVC0_HW_METRIC_QUERY_INST_ISSUED,
148 .queries[0] = _SM(INST_ISSUED1_0),
149 .queries[1] = _SM(INST_ISSUED1_1),
150 .queries[2] = _SM(INST_ISSUED2_0),
151 .queries[3] = _SM(INST_ISSUED2_1),
155 static const struct nvc0_hw_metric_query_cfg
156 sm21_inst_replay_overhead =
158 .type = NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD,
159 .queries[0] = _SM(INST_ISSUED1_0),
160 .queries[1] = _SM(INST_ISSUED1_1),
161 .queries[2] = _SM(INST_ISSUED2_0),
162 .queries[3] = _SM(INST_ISSUED2_1),
163 .queries[4] = _SM(INST_EXECUTED),
167 static const struct nvc0_hw_metric_query_cfg
170 .type = NVC0_HW_METRIC_QUERY_ISSUED_IPC,
171 .queries[0] = _SM(INST_ISSUED1_0),
172 .queries[1] = _SM(INST_ISSUED1_1),
173 .queries[2] = _SM(INST_ISSUED2_0),
174 .queries[3] = _SM(INST_ISSUED2_1),
175 .queries[4] = _SM(ACTIVE_CYCLES),
179 static const struct nvc0_hw_metric_query_cfg
182 .type = NVC0_HW_METRIC_QUERY_ISSUE_SLOTS,
183 .queries[0] = _SM(INST_ISSUED1_0),
184 .queries[1] = _SM(INST_ISSUED1_1),
185 .queries[2] = _SM(INST_ISSUED2_0),
186 .queries[3] = _SM(INST_ISSUED2_1),
190 static const struct nvc0_hw_metric_query_cfg
191 sm21_issue_slot_utilization =
193 .type = NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION,
194 .queries[0] = _SM(INST_ISSUED1_0),
195 .queries[1] = _SM(INST_ISSUED1_1),
196 .queries[2] = _SM(INST_ISSUED2_0),
197 .queries[3] = _SM(INST_ISSUED2_1),
198 .queries[4] = _SM(ACTIVE_CYCLES),
202 static const struct nvc0_hw_metric_query_cfg *sm21_hw_metric_queries[] =
204 &sm20_achieved_occupancy,
205 &sm20_branch_efficiency,
208 &sm21_inst_replay_overhead,
211 &sm21_issue_slot_utilization,
215 /* ==== Compute capability 3.0 (GK104/GK106/GK107) ==== */
216 static const struct nvc0_hw_metric_query_cfg
217 sm30_achieved_occupancy =
219 .type = NVC0_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY,
220 .queries[0] = _SM(ACTIVE_WARPS),
221 .queries[1] = _SM(ACTIVE_CYCLES),
225 static const struct nvc0_hw_metric_query_cfg
226 sm30_branch_efficiency =
228 .type = NVC0_HW_METRIC_QUERY_BRANCH_EFFICIENCY,
229 .queries[0] = _SM(BRANCH),
230 .queries[1] = _SM(DIVERGENT_BRANCH),
234 static const struct nvc0_hw_metric_query_cfg
237 .type = NVC0_HW_METRIC_QUERY_INST_ISSUED,
238 .queries[0] = _SM(INST_ISSUED1),
239 .queries[1] = _SM(INST_ISSUED2),
243 static const struct nvc0_hw_metric_query_cfg
246 .type = NVC0_HW_METRIC_QUERY_INST_PER_WRAP,
247 .queries[0] = _SM(INST_EXECUTED),
248 .queries[1] = _SM(WARPS_LAUNCHED),
252 static const struct nvc0_hw_metric_query_cfg
253 sm30_inst_replay_overhead =
255 .type = NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD,
256 .queries[0] = _SM(INST_ISSUED1),
257 .queries[1] = _SM(INST_ISSUED2),
258 .queries[2] = _SM(INST_EXECUTED),
262 static const struct nvc0_hw_metric_query_cfg
265 .type = NVC0_HW_METRIC_QUERY_ISSUED_IPC,
266 .queries[0] = _SM(INST_ISSUED1),
267 .queries[1] = _SM(INST_ISSUED2),
268 .queries[2] = _SM(ACTIVE_CYCLES),
272 static const struct nvc0_hw_metric_query_cfg
275 .type = NVC0_HW_METRIC_QUERY_ISSUE_SLOTS,
276 .queries[0] = _SM(INST_ISSUED1),
277 .queries[1] = _SM(INST_ISSUED2),
281 static const struct nvc0_hw_metric_query_cfg
282 sm30_issue_slot_utilization =
284 .type = NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION,
285 .queries[0] = _SM(INST_ISSUED1),
286 .queries[1] = _SM(INST_ISSUED2),
287 .queries[2] = _SM(ACTIVE_CYCLES),
291 static const struct nvc0_hw_metric_query_cfg
294 .type = NVC0_HW_METRIC_QUERY_IPC,
295 .queries[0] = _SM(INST_EXECUTED),
296 .queries[1] = _SM(ACTIVE_CYCLES),
300 static const struct nvc0_hw_metric_query_cfg
301 sm30_shared_replay_overhead =
303 .type = NVC0_HW_METRIC_QUERY_SHARED_REPLAY_OVERHEAD,
304 .queries[0] = _SM(SHARED_LD_REPLAY),
305 .queries[1] = _SM(SHARED_ST_REPLAY),
306 .queries[2] = _SM(INST_EXECUTED),
310 static const struct nvc0_hw_metric_query_cfg *sm30_hw_metric_queries[] =
312 &sm30_achieved_occupancy,
313 &sm30_branch_efficiency,
316 &sm30_inst_replay_overhead,
319 &sm30_issue_slot_utilization,
321 &sm30_shared_replay_overhead,
324 /* ==== Compute capability 3.5 (GK110) ==== */
325 static const struct nvc0_hw_metric_query_cfg *sm35_hw_metric_queries[] =
327 &sm30_achieved_occupancy,
330 &sm30_inst_replay_overhead,
333 &sm30_issue_slot_utilization,
335 &sm30_shared_replay_overhead,
340 static inline const struct nvc0_hw_metric_query_cfg **
341 nvc0_hw_metric_get_queries(struct nvc0_screen *screen)
343 struct nouveau_device *dev = screen->base.device;
345 switch (screen->base.class_3d) {
347 return sm35_hw_metric_queries;
349 return sm30_hw_metric_queries;
351 if (dev->chipset == 0xc0 || dev->chipset == 0xc8)
352 return sm20_hw_metric_queries;
353 return sm21_hw_metric_queries;
360 nvc0_hw_metric_get_num_queries(struct nvc0_screen *screen)
362 struct nouveau_device *dev = screen->base.device;
364 switch (screen->base.class_3d) {
366 return ARRAY_SIZE(sm35_hw_metric_queries);
368 return ARRAY_SIZE(sm30_hw_metric_queries);
370 if (dev->chipset == 0xc0 || dev->chipset == 0xc8)
371 return ARRAY_SIZE(sm20_hw_metric_queries);
372 return ARRAY_SIZE(sm21_hw_metric_queries);
377 static const struct nvc0_hw_metric_query_cfg *
378 nvc0_hw_metric_query_get_cfg(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)
380 const struct nvc0_hw_metric_query_cfg **queries;
381 struct nvc0_screen *screen = nvc0->screen;
382 struct nvc0_query *q = &hq->base;
383 unsigned num_queries;
386 num_queries = nvc0_hw_metric_get_num_queries(screen);
387 queries = nvc0_hw_metric_get_queries(screen);
389 for (i = 0; i < num_queries; i++) {
390 if (NVC0_HW_METRIC_QUERY(queries[i]->type) == q->type)
398 nvc0_hw_metric_destroy_query(struct nvc0_context *nvc0,
399 struct nvc0_hw_query *hq)
401 struct nvc0_hw_metric_query *hmq = nvc0_hw_metric_query(hq);
404 for (i = 0; i < hmq->num_queries; i++)
405 if (hmq->queries[i]->funcs->destroy_query)
406 hmq->queries[i]->funcs->destroy_query(nvc0, hmq->queries[i]);
411 nvc0_hw_metric_begin_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)
413 struct nvc0_hw_metric_query *hmq = nvc0_hw_metric_query(hq);
417 for (i = 0; i < hmq->num_queries; i++) {
418 ret = hmq->queries[i]->funcs->begin_query(nvc0, hmq->queries[i]);
426 nvc0_hw_metric_end_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)
428 struct nvc0_hw_metric_query *hmq = nvc0_hw_metric_query(hq);
431 for (i = 0; i < hmq->num_queries; i++)
432 hmq->queries[i]->funcs->end_query(nvc0, hmq->queries[i]);
436 sm20_hw_metric_calc_result(struct nvc0_hw_query *hq, uint64_t res64[8])
438 switch (hq->base.type - NVC0_HW_METRIC_QUERY(0)) {
439 case NVC0_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY:
440 /* (active_warps / active_cycles) / max. number of warps on a MP */
442 return (res64[0] / (double)res64[1]) / 48;
444 case NVC0_HW_METRIC_QUERY_BRANCH_EFFICIENCY:
445 /* (branch / (branch + divergent_branch)) * 100 */
446 if (res64[0] + res64[1])
447 return (res64[0] / (double)(res64[0] + res64[1])) * 100;
449 case NVC0_HW_METRIC_QUERY_INST_PER_WRAP:
450 /* inst_executed / warps_launched */
452 return res64[0] / (double)res64[1];
454 case NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD:
455 /* (inst_issued - inst_executed) / inst_executed */
457 return (res64[0] - res64[1]) / (double)res64[1];
459 case NVC0_HW_METRIC_QUERY_ISSUED_IPC:
460 /* inst_issued / active_cycles */
462 return res64[0] / (double)res64[1];
464 case NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION:
465 /* ((inst_issued / 2) / active_cycles) * 100 */
467 return ((res64[0] / 2) / (double)res64[1]) * 100;
469 case NVC0_HW_METRIC_QUERY_IPC:
470 /* inst_executed / active_cycles */
472 return res64[0] / (double)res64[1];
475 debug_printf("invalid metric type: %d\n",
476 hq->base.type - NVC0_HW_METRIC_QUERY(0));
483 sm21_hw_metric_calc_result(struct nvc0_hw_query *hq, uint64_t res64[8])
485 switch (hq->base.type - NVC0_HW_METRIC_QUERY(0)) {
486 case NVC0_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY:
487 return sm20_hw_metric_calc_result(hq, res64);
488 case NVC0_HW_METRIC_QUERY_BRANCH_EFFICIENCY:
489 return sm20_hw_metric_calc_result(hq, res64);
490 case NVC0_HW_METRIC_QUERY_INST_ISSUED:
491 /* issued1_0 + issued1_1 + (issued2_0 + issued2_1) * 2 */
492 return res64[0] + res64[1] + (res64[2] + res64[3]) * 2;
494 case NVC0_HW_METRIC_QUERY_INST_PER_WRAP:
495 return sm20_hw_metric_calc_result(hq, res64);
496 case NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD:
497 /* (metric-inst_issued - inst_executed) / inst_executed */
499 return (((res64[0] + res64[1] + (res64[2] + res64[3]) * 2) -
500 res64[4]) / (double)res64[4]);
502 case NVC0_HW_METRIC_QUERY_ISSUED_IPC:
503 /* metric-inst_issued / active_cycles */
505 return (res64[0] + res64[1] + (res64[2] + res64[3]) * 2) /
508 case NVC0_HW_METRIC_QUERY_ISSUE_SLOTS:
509 /* issued1_0 + issued1_1 + issued2_0 + issued2_1 */
510 return res64[0] + res64[1] + res64[2] + res64[3];
512 case NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION:
513 /* ((metric-issue_slots / 2) / active_cycles) * 100 */
515 return (((res64[0] + res64[1] + res64[2] + res64[3]) / 2) /
516 (double)res64[4]) * 100;
518 case NVC0_HW_METRIC_QUERY_IPC:
519 return sm20_hw_metric_calc_result(hq, res64);
521 debug_printf("invalid metric type: %d\n",
522 hq->base.type - NVC0_HW_METRIC_QUERY(0));
529 sm30_hw_metric_calc_result(struct nvc0_hw_query *hq, uint64_t res64[8])
531 switch (hq->base.type - NVC0_HW_METRIC_QUERY(0)) {
532 case NVC0_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY:
533 /* (active_warps / active_cycles) / max. number of warps on a MP */
535 return (res64[0] / (double)res64[1]) / 64;
537 case NVC0_HW_METRIC_QUERY_BRANCH_EFFICIENCY:
538 return sm20_hw_metric_calc_result(hq, res64);
539 case NVC0_HW_METRIC_QUERY_INST_ISSUED:
540 /* inst_issued1 + inst_issued2 * 2 */
541 return res64[0] + res64[1] * 2;
542 case NVC0_HW_METRIC_QUERY_INST_PER_WRAP:
543 return sm20_hw_metric_calc_result(hq, res64);
544 case NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD:
545 /* (metric-inst_issued - inst_executed) / inst_executed */
547 return (((res64[0] + res64[1] * 2) - res64[2]) / (double)res64[2]);
549 case NVC0_HW_METRIC_QUERY_ISSUED_IPC:
550 /* metric-inst_issued / active_cycles */
552 return (res64[0] + res64[1] * 2) / (double)res64[2];
554 case NVC0_HW_METRIC_QUERY_ISSUE_SLOTS:
555 /* inst_issued1 + inst_issued2 */
556 return res64[0] + res64[1];
557 case NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION:
558 /* ((metric-issue_slots / 2) / active_cycles) * 100 */
560 return (((res64[0] + res64[1]) / 2) / (double)res64[2]) * 100;
562 case NVC0_HW_METRIC_QUERY_IPC:
563 return sm20_hw_metric_calc_result(hq, res64);
564 case NVC0_HW_METRIC_QUERY_SHARED_REPLAY_OVERHEAD:
565 /* (shared_load_replay + shared_store_replay) / inst_executed */
567 return (res64[0] + res64[1]) / (double)res64[2];
570 debug_printf("invalid metric type: %d\n",
571 hq->base.type - NVC0_HW_METRIC_QUERY(0));
578 nvc0_hw_metric_get_query_result(struct nvc0_context *nvc0,
579 struct nvc0_hw_query *hq, boolean wait,
580 union pipe_query_result *result)
582 struct nvc0_hw_metric_query *hmq = nvc0_hw_metric_query(hq);
583 struct nvc0_screen *screen = nvc0->screen;
584 struct nouveau_device *dev = screen->base.device;
585 union pipe_query_result results[8] = {};
586 uint64_t res64[8] = {};
591 for (i = 0; i < hmq->num_queries; i++) {
592 ret = hmq->queries[i]->funcs->get_query_result(nvc0, hmq->queries[i],
596 res64[i] = *(uint64_t *)&results[i];
599 switch (screen->base.class_3d) {
602 value = sm30_hw_metric_calc_result(hq, res64);
605 if (dev->chipset == 0xc0 || dev->chipset == 0xc8)
606 value = sm20_hw_metric_calc_result(hq, res64);
608 value = sm21_hw_metric_calc_result(hq, res64);
612 *(uint64_t *)result = value;
616 static const struct nvc0_hw_query_funcs hw_metric_query_funcs = {
617 .destroy_query = nvc0_hw_metric_destroy_query,
618 .begin_query = nvc0_hw_metric_begin_query,
619 .end_query = nvc0_hw_metric_end_query,
620 .get_query_result = nvc0_hw_metric_get_query_result,
623 struct nvc0_hw_query *
624 nvc0_hw_metric_create_query(struct nvc0_context *nvc0, unsigned type)
626 const struct nvc0_hw_metric_query_cfg *cfg;
627 struct nvc0_hw_metric_query *hmq;
628 struct nvc0_hw_query *hq;
631 if (type < NVC0_HW_METRIC_QUERY(0) || type > NVC0_HW_METRIC_QUERY_LAST)
634 hmq = CALLOC_STRUCT(nvc0_hw_metric_query);
639 hq->funcs = &hw_metric_query_funcs;
640 hq->base.type = type;
642 cfg = nvc0_hw_metric_query_get_cfg(nvc0, hq);
644 for (i = 0; i < cfg->num_queries; i++) {
645 hmq->queries[i] = nvc0_hw_sm_create_query(nvc0, cfg->queries[i]);
646 if (!hmq->queries[i]) {
647 nvc0_hw_metric_destroy_query(nvc0, hq);
657 nvc0_hw_metric_get_driver_query_info(struct nvc0_screen *screen, unsigned id,
658 struct pipe_driver_query_info *info)
662 if (screen->base.drm->version >= 0x01000101) {
664 count = nvc0_hw_metric_get_num_queries(screen);
671 if (screen->compute) {
672 if (screen->base.class_3d <= NVF0_3D_CLASS) {
673 const struct nvc0_hw_metric_query_cfg **queries =
674 nvc0_hw_metric_get_queries(screen);
675 const struct nvc0_hw_metric_cfg *cfg =
676 nvc0_hw_metric_get_cfg(queries[id]->type);
678 info->name = cfg->name;
679 info->query_type = NVC0_HW_METRIC_QUERY(queries[id]->type);
680 info->type = cfg->type;
681 info->group_id = NVC0_HW_METRIC_QUERY_GROUP;