OSDN Git Service

nvc0: display some performance metrics with a percentage
[android-x86/external-mesa.git] / src / gallium / drivers / nouveau / nvc0 / nvc0_query_hw_metric.c
1 /*
2  * Copyright 2015 Samuel Pitoiset
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  */
22
23 #include "nvc0/nvc0_context.h"
24 #include "nvc0/nvc0_query_hw_metric.h"
25 #include "nvc0/nvc0_query_hw_sm.h"
26
27 #define _Q(i,n,t) { NVC0_HW_METRIC_QUERY_##i, n, PIPE_DRIVER_QUERY_TYPE_##t }
28 struct nvc0_hw_metric_cfg {
29    unsigned id;
30    const char *name;
31    enum pipe_driver_query_type type;
32 } nvc0_hw_metric_queries[] = {
33    _Q(ACHIEVED_OCCUPANCY,        "metric-achieved_occupancy",     PERCENTAGE  ),
34    _Q(BRANCH_EFFICIENCY,         "metric-branch_efficiency",      PERCENTAGE  ),
35    _Q(INST_ISSUED,               "metric-inst_issued",            UINT64      ),
36    _Q(INST_PER_WRAP,             "metric-inst_per_wrap",          UINT64      ),
37    _Q(INST_REPLAY_OVERHEAD,      "metric-inst_replay_overhead",   UINT64      ),
38    _Q(ISSUED_IPC,                "metric-issued_ipc",             UINT64      ),
39    _Q(ISSUE_SLOTS,               "metric-issue_slots",            UINT64      ),
40    _Q(ISSUE_SLOT_UTILIZATION,    "metric-issue_slot_utilization", PERCENTAGE  ),
41    _Q(IPC,                       "metric-ipc",                    UINT64      ),
42    _Q(SHARED_REPLAY_OVERHEAD,    "metric-shared_replay_overhead", UINT64      ),
43 };
44
45 #undef _Q
46
47 static inline const struct nvc0_hw_metric_cfg *
48 nvc0_hw_metric_get_cfg(unsigned metric_id)
49 {
50    unsigned i;
51
52    for (i = 0; i < ARRAY_SIZE(nvc0_hw_metric_queries); i++) {
53       if (nvc0_hw_metric_queries[i].id == metric_id)
54          return &nvc0_hw_metric_queries[i];
55    }
56    assert(0);
57    return NULL;
58 }
59
60 struct nvc0_hw_metric_query_cfg {
61    unsigned type;
62    uint32_t queries[8];
63    uint32_t num_queries;
64 };
65
66 #define _SM(n) NVC0_HW_SM_QUERY(NVC0_HW_SM_QUERY_ ##n)
67
68 /* ==== Compute capability 2.0 (GF100/GF110) ==== */
69 static const struct nvc0_hw_metric_query_cfg
70 sm20_achieved_occupancy =
71 {
72    .type        = NVC0_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY,
73    .queries[0]  = _SM(ACTIVE_WARPS),
74    .queries[1]  = _SM(ACTIVE_CYCLES),
75    .num_queries = 2,
76 };
77
78 static const struct nvc0_hw_metric_query_cfg
79 sm20_branch_efficiency =
80 {
81    .type        = NVC0_HW_METRIC_QUERY_BRANCH_EFFICIENCY,
82    .queries[0]  = _SM(BRANCH),
83    .queries[1]  = _SM(DIVERGENT_BRANCH),
84    .num_queries = 2,
85 };
86
87 static const struct nvc0_hw_metric_query_cfg
88 sm20_inst_per_wrap =
89 {
90    .type        = NVC0_HW_METRIC_QUERY_INST_PER_WRAP,
91    .queries[0]  = _SM(INST_EXECUTED),
92    .queries[1]  = _SM(WARPS_LAUNCHED),
93    .num_queries = 2,
94 };
95
96 static const struct nvc0_hw_metric_query_cfg
97 sm20_inst_replay_overhead =
98 {
99    .type        = NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD,
100    .queries[0]  = _SM(INST_ISSUED),
101    .queries[1]  = _SM(INST_EXECUTED),
102    .num_queries = 2,
103 };
104
105 static const struct nvc0_hw_metric_query_cfg
106 sm20_issued_ipc =
107 {
108    .type        = NVC0_HW_METRIC_QUERY_ISSUED_IPC,
109    .queries[0]  = _SM(INST_ISSUED),
110    .queries[1]  = _SM(ACTIVE_CYCLES),
111    .num_queries = 2,
112 };
113
114 static const struct nvc0_hw_metric_query_cfg
115 sm20_issue_slot_utilization =
116 {
117    .type        = NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION,
118    .queries[0]  = _SM(INST_ISSUED),
119    .queries[1]  = _SM(ACTIVE_CYCLES),
120    .num_queries = 2,
121 };
122
123 static const struct nvc0_hw_metric_query_cfg
124 sm20_ipc =
125 {
126    .type        = NVC0_HW_METRIC_QUERY_IPC,
127    .queries[0]  = _SM(INST_EXECUTED),
128    .queries[1]  = _SM(ACTIVE_CYCLES),
129    .num_queries = 2,
130 };
131
132 static const struct nvc0_hw_metric_query_cfg *sm20_hw_metric_queries[] =
133 {
134    &sm20_achieved_occupancy,
135    &sm20_branch_efficiency,
136    &sm20_inst_per_wrap,
137    &sm20_inst_replay_overhead,
138    &sm20_issued_ipc,
139    &sm20_issue_slot_utilization,
140    &sm20_ipc,
141 };
142
143 /* ==== Compute capability 2.1 (GF108+ except GF110) ==== */
144 static const struct nvc0_hw_metric_query_cfg
145 sm21_inst_issued =
146 {
147    .type        = NVC0_HW_METRIC_QUERY_INST_ISSUED,
148    .queries[0]  = _SM(INST_ISSUED1_0),
149    .queries[1]  = _SM(INST_ISSUED1_1),
150    .queries[2]  = _SM(INST_ISSUED2_0),
151    .queries[3]  = _SM(INST_ISSUED2_1),
152    .num_queries = 4,
153 };
154
155 static const struct nvc0_hw_metric_query_cfg
156 sm21_inst_replay_overhead =
157 {
158    .type        = NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD,
159    .queries[0]  = _SM(INST_ISSUED1_0),
160    .queries[1]  = _SM(INST_ISSUED1_1),
161    .queries[2]  = _SM(INST_ISSUED2_0),
162    .queries[3]  = _SM(INST_ISSUED2_1),
163    .queries[4]  = _SM(INST_EXECUTED),
164    .num_queries = 5,
165 };
166
167 static const struct nvc0_hw_metric_query_cfg
168 sm21_issued_ipc =
169 {
170    .type        = NVC0_HW_METRIC_QUERY_ISSUED_IPC,
171    .queries[0]  = _SM(INST_ISSUED1_0),
172    .queries[1]  = _SM(INST_ISSUED1_1),
173    .queries[2]  = _SM(INST_ISSUED2_0),
174    .queries[3]  = _SM(INST_ISSUED2_1),
175    .queries[4]  = _SM(ACTIVE_CYCLES),
176    .num_queries = 5,
177 };
178
179 static const struct nvc0_hw_metric_query_cfg
180 sm21_issue_slots =
181 {
182    .type        = NVC0_HW_METRIC_QUERY_ISSUE_SLOTS,
183    .queries[0]  = _SM(INST_ISSUED1_0),
184    .queries[1]  = _SM(INST_ISSUED1_1),
185    .queries[2]  = _SM(INST_ISSUED2_0),
186    .queries[3]  = _SM(INST_ISSUED2_1),
187    .num_queries = 4,
188 };
189
190 static const struct nvc0_hw_metric_query_cfg
191 sm21_issue_slot_utilization =
192 {
193    .type        = NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION,
194    .queries[0]  = _SM(INST_ISSUED1_0),
195    .queries[1]  = _SM(INST_ISSUED1_1),
196    .queries[2]  = _SM(INST_ISSUED2_0),
197    .queries[3]  = _SM(INST_ISSUED2_1),
198    .queries[4]  = _SM(ACTIVE_CYCLES),
199    .num_queries = 5,
200 };
201
202 static const struct nvc0_hw_metric_query_cfg *sm21_hw_metric_queries[] =
203 {
204    &sm20_achieved_occupancy,
205    &sm20_branch_efficiency,
206    &sm21_inst_issued,
207    &sm20_inst_per_wrap,
208    &sm21_inst_replay_overhead,
209    &sm21_issued_ipc,
210    &sm21_issue_slots,
211    &sm21_issue_slot_utilization,
212    &sm20_ipc,
213 };
214
215 /* ==== Compute capability 3.0 (GK104/GK106/GK107) ==== */
216 static const struct nvc0_hw_metric_query_cfg
217 sm30_achieved_occupancy =
218 {
219    .type        = NVC0_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY,
220    .queries[0]  = _SM(ACTIVE_WARPS),
221    .queries[1]  = _SM(ACTIVE_CYCLES),
222    .num_queries = 2,
223 };
224
225 static const struct nvc0_hw_metric_query_cfg
226 sm30_branch_efficiency =
227 {
228    .type        = NVC0_HW_METRIC_QUERY_BRANCH_EFFICIENCY,
229    .queries[0]  = _SM(BRANCH),
230    .queries[1]  = _SM(DIVERGENT_BRANCH),
231    .num_queries = 2,
232 };
233
234 static const struct nvc0_hw_metric_query_cfg
235 sm30_inst_issued =
236 {
237    .type        = NVC0_HW_METRIC_QUERY_INST_ISSUED,
238    .queries[0]  = _SM(INST_ISSUED1),
239    .queries[1]  = _SM(INST_ISSUED2),
240    .num_queries = 2,
241 };
242
243 static const struct nvc0_hw_metric_query_cfg
244 sm30_inst_per_wrap =
245 {
246    .type        = NVC0_HW_METRIC_QUERY_INST_PER_WRAP,
247    .queries[0]  = _SM(INST_EXECUTED),
248    .queries[1]  = _SM(WARPS_LAUNCHED),
249    .num_queries = 2,
250 };
251
252 static const struct nvc0_hw_metric_query_cfg
253 sm30_inst_replay_overhead =
254 {
255    .type        = NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD,
256    .queries[0]  = _SM(INST_ISSUED1),
257    .queries[1]  = _SM(INST_ISSUED2),
258    .queries[2]  = _SM(INST_EXECUTED),
259    .num_queries = 3,
260 };
261
262 static const struct nvc0_hw_metric_query_cfg
263 sm30_issued_ipc =
264 {
265    .type        = NVC0_HW_METRIC_QUERY_ISSUED_IPC,
266    .queries[0]  = _SM(INST_ISSUED1),
267    .queries[1]  = _SM(INST_ISSUED2),
268    .queries[2]  = _SM(ACTIVE_CYCLES),
269    .num_queries = 3,
270 };
271
272 static const struct nvc0_hw_metric_query_cfg
273 sm30_issue_slots =
274 {
275    .type        = NVC0_HW_METRIC_QUERY_ISSUE_SLOTS,
276    .queries[0]  = _SM(INST_ISSUED1),
277    .queries[1]  = _SM(INST_ISSUED2),
278    .num_queries = 2,
279 };
280
281 static const struct nvc0_hw_metric_query_cfg
282 sm30_issue_slot_utilization =
283 {
284    .type        = NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION,
285    .queries[0]  = _SM(INST_ISSUED1),
286    .queries[1]  = _SM(INST_ISSUED2),
287    .queries[2]  = _SM(ACTIVE_CYCLES),
288    .num_queries = 3,
289 };
290
291 static const struct nvc0_hw_metric_query_cfg
292 sm30_ipc =
293 {
294    .type        = NVC0_HW_METRIC_QUERY_IPC,
295    .queries[0]  = _SM(INST_EXECUTED),
296    .queries[1]  = _SM(ACTIVE_CYCLES),
297    .num_queries = 2,
298 };
299
300 static const struct nvc0_hw_metric_query_cfg
301 sm30_shared_replay_overhead =
302 {
303    .type        = NVC0_HW_METRIC_QUERY_SHARED_REPLAY_OVERHEAD,
304    .queries[0]  = _SM(SHARED_LD_REPLAY),
305    .queries[1]  = _SM(SHARED_ST_REPLAY),
306    .queries[2]  = _SM(INST_EXECUTED),
307    .num_queries = 3,
308 };
309
310 static const struct nvc0_hw_metric_query_cfg *sm30_hw_metric_queries[] =
311 {
312    &sm30_achieved_occupancy,
313    &sm30_branch_efficiency,
314    &sm30_inst_issued,
315    &sm30_inst_per_wrap,
316    &sm30_inst_replay_overhead,
317    &sm30_issued_ipc,
318    &sm30_issue_slots,
319    &sm30_issue_slot_utilization,
320    &sm30_ipc,
321    &sm30_shared_replay_overhead,
322 };
323
324 /* ==== Compute capability 3.5 (GK110) ==== */
325 static const struct nvc0_hw_metric_query_cfg *sm35_hw_metric_queries[] =
326 {
327    &sm30_achieved_occupancy,
328    &sm30_inst_issued,
329    &sm30_inst_per_wrap,
330    &sm30_inst_replay_overhead,
331    &sm30_issued_ipc,
332    &sm30_inst_issued,
333    &sm30_issue_slot_utilization,
334    &sm30_ipc,
335    &sm30_shared_replay_overhead,
336 };
337
338 #undef _SM
339
340 static inline const struct nvc0_hw_metric_query_cfg **
341 nvc0_hw_metric_get_queries(struct nvc0_screen *screen)
342 {
343    struct nouveau_device *dev = screen->base.device;
344
345    switch (screen->base.class_3d) {
346    case NVF0_3D_CLASS:
347       return sm35_hw_metric_queries;
348    case NVE4_3D_CLASS:
349       return sm30_hw_metric_queries;
350    default:
351       if (dev->chipset == 0xc0 || dev->chipset == 0xc8)
352          return sm20_hw_metric_queries;
353       return sm21_hw_metric_queries;
354    }
355    assert(0);
356    return NULL;
357 }
358
359 unsigned
360 nvc0_hw_metric_get_num_queries(struct nvc0_screen *screen)
361 {
362    struct nouveau_device *dev = screen->base.device;
363
364    switch (screen->base.class_3d) {
365    case NVF0_3D_CLASS:
366       return ARRAY_SIZE(sm35_hw_metric_queries);
367    case NVE4_3D_CLASS:
368       return ARRAY_SIZE(sm30_hw_metric_queries);
369    default:
370       if (dev->chipset == 0xc0 || dev->chipset == 0xc8)
371          return ARRAY_SIZE(sm20_hw_metric_queries);
372       return ARRAY_SIZE(sm21_hw_metric_queries);
373    }
374    return 0;
375 }
376
377 static const struct nvc0_hw_metric_query_cfg *
378 nvc0_hw_metric_query_get_cfg(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)
379 {
380    const struct nvc0_hw_metric_query_cfg **queries;
381    struct nvc0_screen *screen = nvc0->screen;
382    struct nvc0_query *q = &hq->base;
383    unsigned num_queries;
384    unsigned i;
385
386    num_queries = nvc0_hw_metric_get_num_queries(screen);
387    queries = nvc0_hw_metric_get_queries(screen);
388
389    for (i = 0; i < num_queries; i++) {
390       if (NVC0_HW_METRIC_QUERY(queries[i]->type) == q->type)
391          return queries[i];
392    }
393    assert(0);
394    return NULL;
395 }
396
397 static void
398 nvc0_hw_metric_destroy_query(struct nvc0_context *nvc0,
399                              struct nvc0_hw_query *hq)
400 {
401    struct nvc0_hw_metric_query *hmq = nvc0_hw_metric_query(hq);
402    unsigned i;
403
404    for (i = 0; i < hmq->num_queries; i++)
405       if (hmq->queries[i]->funcs->destroy_query)
406          hmq->queries[i]->funcs->destroy_query(nvc0, hmq->queries[i]);
407    FREE(hmq);
408 }
409
410 static boolean
411 nvc0_hw_metric_begin_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)
412 {
413    struct nvc0_hw_metric_query *hmq = nvc0_hw_metric_query(hq);
414    boolean ret = false;
415    unsigned i;
416
417    for (i = 0; i < hmq->num_queries; i++) {
418       ret = hmq->queries[i]->funcs->begin_query(nvc0, hmq->queries[i]);
419       if (!ret)
420          return ret;
421    }
422    return ret;
423 }
424
425 static void
426 nvc0_hw_metric_end_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)
427 {
428    struct nvc0_hw_metric_query *hmq = nvc0_hw_metric_query(hq);
429    unsigned i;
430
431    for (i = 0; i < hmq->num_queries; i++)
432       hmq->queries[i]->funcs->end_query(nvc0, hmq->queries[i]);
433 }
434
435 static uint64_t
436 sm20_hw_metric_calc_result(struct nvc0_hw_query *hq, uint64_t res64[8])
437 {
438    switch (hq->base.type - NVC0_HW_METRIC_QUERY(0)) {
439    case NVC0_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY:
440       /* (active_warps / active_cycles) / max. number of warps on a MP */
441       if (res64[1])
442          return (res64[0] / (double)res64[1]) / 48;
443       break;
444    case NVC0_HW_METRIC_QUERY_BRANCH_EFFICIENCY:
445       /* (branch / (branch + divergent_branch)) * 100 */
446       if (res64[0] + res64[1])
447          return (res64[0] / (double)(res64[0] + res64[1])) * 100;
448       break;
449    case NVC0_HW_METRIC_QUERY_INST_PER_WRAP:
450       /* inst_executed / warps_launched */
451       if (res64[1])
452          return res64[0] / (double)res64[1];
453       break;
454    case NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD:
455       /* (inst_issued - inst_executed) / inst_executed */
456       if (res64[1])
457          return (res64[0] - res64[1]) / (double)res64[1];
458       break;
459    case NVC0_HW_METRIC_QUERY_ISSUED_IPC:
460       /* inst_issued / active_cycles */
461       if (res64[1])
462          return res64[0] / (double)res64[1];
463       break;
464    case NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION:
465       /* ((inst_issued / 2) / active_cycles) * 100 */
466       if (res64[1])
467          return ((res64[0] / 2) / (double)res64[1]) * 100;
468       break;
469    case NVC0_HW_METRIC_QUERY_IPC:
470       /* inst_executed / active_cycles */
471       if (res64[1])
472          return res64[0] / (double)res64[1];
473       break;
474    default:
475       debug_printf("invalid metric type: %d\n",
476                    hq->base.type - NVC0_HW_METRIC_QUERY(0));
477       break;
478    }
479    return 0;
480 }
481
482 static uint64_t
483 sm21_hw_metric_calc_result(struct nvc0_hw_query *hq, uint64_t res64[8])
484 {
485    switch (hq->base.type - NVC0_HW_METRIC_QUERY(0)) {
486    case NVC0_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY:
487       return sm20_hw_metric_calc_result(hq, res64);
488    case NVC0_HW_METRIC_QUERY_BRANCH_EFFICIENCY:
489       return sm20_hw_metric_calc_result(hq, res64);
490    case NVC0_HW_METRIC_QUERY_INST_ISSUED:
491       /* issued1_0 + issued1_1 + (issued2_0 + issued2_1) * 2 */
492       return res64[0] + res64[1] + (res64[2] + res64[3]) * 2;
493       break;
494    case NVC0_HW_METRIC_QUERY_INST_PER_WRAP:
495       return sm20_hw_metric_calc_result(hq, res64);
496    case NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD:
497       /* (metric-inst_issued - inst_executed) / inst_executed */
498       if (res64[4])
499          return (((res64[0] + res64[1] + (res64[2] + res64[3]) * 2) -
500                    res64[4]) / (double)res64[4]);
501       break;
502    case NVC0_HW_METRIC_QUERY_ISSUED_IPC:
503       /* metric-inst_issued / active_cycles */
504       if (res64[4])
505          return (res64[0] + res64[1] + (res64[2] + res64[3]) * 2) /
506                 (double)res64[4];
507       break;
508    case NVC0_HW_METRIC_QUERY_ISSUE_SLOTS:
509       /* issued1_0 + issued1_1 + issued2_0 + issued2_1 */
510       return res64[0] + res64[1] + res64[2] + res64[3];
511       break;
512    case NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION:
513       /* ((metric-issue_slots / 2) / active_cycles) * 100 */
514       if (res64[4])
515          return (((res64[0] + res64[1] + res64[2] + res64[3]) / 2) /
516                  (double)res64[4]) * 100;
517       break;
518    case NVC0_HW_METRIC_QUERY_IPC:
519       return sm20_hw_metric_calc_result(hq, res64);
520    default:
521       debug_printf("invalid metric type: %d\n",
522                    hq->base.type - NVC0_HW_METRIC_QUERY(0));
523       break;
524    }
525    return 0;
526 }
527
528 static uint64_t
529 sm30_hw_metric_calc_result(struct nvc0_hw_query *hq, uint64_t res64[8])
530 {
531    switch (hq->base.type - NVC0_HW_METRIC_QUERY(0)) {
532    case NVC0_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY:
533       /* (active_warps / active_cycles) / max. number of warps on a MP */
534       if (res64[1])
535          return (res64[0] / (double)res64[1]) / 64;
536       break;
537    case NVC0_HW_METRIC_QUERY_BRANCH_EFFICIENCY:
538       return sm20_hw_metric_calc_result(hq, res64);
539    case NVC0_HW_METRIC_QUERY_INST_ISSUED:
540       /* inst_issued1 + inst_issued2 * 2 */
541       return res64[0] + res64[1] * 2;
542    case NVC0_HW_METRIC_QUERY_INST_PER_WRAP:
543       return sm20_hw_metric_calc_result(hq, res64);
544    case NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD:
545       /* (metric-inst_issued - inst_executed) / inst_executed */
546       if (res64[2])
547          return (((res64[0] + res64[1] * 2) - res64[2]) / (double)res64[2]);
548       break;
549    case NVC0_HW_METRIC_QUERY_ISSUED_IPC:
550       /* metric-inst_issued / active_cycles */
551       if (res64[2])
552          return (res64[0] + res64[1] * 2) / (double)res64[2];
553       break;
554    case NVC0_HW_METRIC_QUERY_ISSUE_SLOTS:
555       /* inst_issued1 + inst_issued2 */
556       return res64[0] + res64[1];
557    case NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION:
558       /* ((metric-issue_slots / 2) / active_cycles) * 100 */
559       if (res64[2])
560          return (((res64[0] + res64[1]) / 2) / (double)res64[2]) * 100;
561       break;
562    case NVC0_HW_METRIC_QUERY_IPC:
563       return sm20_hw_metric_calc_result(hq, res64);
564    case NVC0_HW_METRIC_QUERY_SHARED_REPLAY_OVERHEAD:
565       /* (shared_load_replay + shared_store_replay) / inst_executed */
566       if (res64[2])
567          return (res64[0] + res64[1]) / (double)res64[2];
568       break;
569    default:
570       debug_printf("invalid metric type: %d\n",
571                    hq->base.type - NVC0_HW_METRIC_QUERY(0));
572       break;
573    }
574    return 0;
575 }
576
577 static boolean
578 nvc0_hw_metric_get_query_result(struct nvc0_context *nvc0,
579                                 struct nvc0_hw_query *hq, boolean wait,
580                                 union pipe_query_result *result)
581 {
582    struct nvc0_hw_metric_query *hmq = nvc0_hw_metric_query(hq);
583    struct nvc0_screen *screen = nvc0->screen;
584    struct nouveau_device *dev = screen->base.device;
585    union pipe_query_result results[8] = {};
586    uint64_t res64[8] = {};
587    uint64_t value = 0;
588    boolean ret = false;
589    unsigned i;
590
591    for (i = 0; i < hmq->num_queries; i++) {
592       ret = hmq->queries[i]->funcs->get_query_result(nvc0, hmq->queries[i],
593                                                      wait, &results[i]);
594       if (!ret)
595          return ret;
596       res64[i] = *(uint64_t *)&results[i];
597    }
598
599    switch (screen->base.class_3d) {
600    case NVF0_3D_CLASS:
601    case NVE4_3D_CLASS:
602       value = sm30_hw_metric_calc_result(hq, res64);
603       break;
604    default:
605       if (dev->chipset == 0xc0 || dev->chipset == 0xc8)
606          value = sm20_hw_metric_calc_result(hq, res64);
607       else
608          value = sm21_hw_metric_calc_result(hq, res64);
609       break;
610    }
611
612    *(uint64_t *)result = value;
613    return ret;
614 }
615
616 static const struct nvc0_hw_query_funcs hw_metric_query_funcs = {
617    .destroy_query = nvc0_hw_metric_destroy_query,
618    .begin_query = nvc0_hw_metric_begin_query,
619    .end_query = nvc0_hw_metric_end_query,
620    .get_query_result = nvc0_hw_metric_get_query_result,
621 };
622
623 struct nvc0_hw_query *
624 nvc0_hw_metric_create_query(struct nvc0_context *nvc0, unsigned type)
625 {
626    const struct nvc0_hw_metric_query_cfg *cfg;
627    struct nvc0_hw_metric_query *hmq;
628    struct nvc0_hw_query *hq;
629    unsigned i;
630
631    if (type < NVC0_HW_METRIC_QUERY(0) || type > NVC0_HW_METRIC_QUERY_LAST)
632       return NULL;
633
634    hmq = CALLOC_STRUCT(nvc0_hw_metric_query);
635    if (!hmq)
636       return NULL;
637
638    hq = &hmq->base;
639    hq->funcs = &hw_metric_query_funcs;
640    hq->base.type = type;
641
642    cfg = nvc0_hw_metric_query_get_cfg(nvc0, hq);
643
644    for (i = 0; i < cfg->num_queries; i++) {
645       hmq->queries[i] = nvc0_hw_sm_create_query(nvc0, cfg->queries[i]);
646       if (!hmq->queries[i]) {
647          nvc0_hw_metric_destroy_query(nvc0, hq);
648          return NULL;
649       }
650       hmq->num_queries++;
651    }
652
653    return hq;
654 }
655
656 int
657 nvc0_hw_metric_get_driver_query_info(struct nvc0_screen *screen, unsigned id,
658                                      struct pipe_driver_query_info *info)
659 {
660    int count = 0;
661
662    if (screen->base.drm->version >= 0x01000101) {
663       if (screen->compute)
664          count = nvc0_hw_metric_get_num_queries(screen);
665    }
666
667    if (!info)
668       return count;
669
670    if (id < count) {
671       if (screen->compute) {
672          if (screen->base.class_3d <= NVF0_3D_CLASS) {
673             const struct nvc0_hw_metric_query_cfg **queries =
674                nvc0_hw_metric_get_queries(screen);
675             const struct nvc0_hw_metric_cfg *cfg =
676                nvc0_hw_metric_get_cfg(queries[id]->type);
677
678             info->name = cfg->name;
679             info->query_type = NVC0_HW_METRIC_QUERY(queries[id]->type);
680             info->type = cfg->type;
681             info->group_id = NVC0_HW_METRIC_QUERY_GROUP;
682             return 1;
683          }
684       }
685    }
686    return 0;
687 }