1 /******************************************************************************
3 * Copyright (C) 2011 - 2014 Xilinx, Inc. All rights reserved.
5 * Permission is hereby granted, free of charge, to any person obtaining a copy
6 * of this software and associated documentation files (the "Software"), to deal
7 * in the Software without restriction, including without limitation the rights
8 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 * copies of the Software, and to permit persons to whom the Software is
10 * furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
15 * Use of the Software is limited solely to applications:
16 * (a) running on a Xilinx device, or
17 * (b) that interact with a Xilinx device through a bus or interconnect.
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * XILINX BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
23 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
24 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
27 * Except as contained in this notice, the name of the Xilinx shall not be used
28 * in advertising or otherwise to promote the sale, use or other dealings in
29 * this Software without prior written authorization from Xilinx.
31 ******************************************************************************/
32 /*****************************************************************************/
37 * This header file contains APIs for configuring and controlling the Cortex-A9
38 * Performance Monitor Events.
39 * Cortex-A9 Performance Monitor has 6 event counters which can be used to
40 * count a variety of events described in Coretx-A9 TRM. This file defines
41 * configurations, where value configures the event counters to count a
44 * Xpm_SetEvents can be used to set the event counters to count a set of events
45 * and Xpm_GetEventCounters can be used to read the counter values.
49 * This file doesn't handle the Cortex-A9 cycle counter, as the cycle counter is
50 * being used for time keeping.
53 * MODIFICATION HISTORY:
55 * Ver Who Date Changes
56 * ----- ---- -------- -----------------------------------------------
57 * 1.00a sdm 07/11/11 First release
60 ******************************************************************************/
62 #ifndef XPMCOUNTER_H /* prevent circular inclusions */
63 #define XPMCOUNTER_H /* by using protection macros */
65 /***************************** Include Files ********************************/
68 #include "xpseudo_asm.h"
69 #include "xil_types.h"
73 #endif /* __cplusplus */
75 /************************** Constant Definitions ****************************/
77 /* Number of performance counters */
78 #define XPM_CTRCOUNT 6
80 /* The following constants define the Cortex-A9 Performance Monitor Events */
83 * Software increment. The register is incremented only on writes to the
84 * Software Increment Register
86 #define XPM_EVENT_SOFTINCR 0x00
89 * Instruction fetch that causes a refill at (at least) the lowest level(s) of
90 * instruction or unified cache. Includes the speculative linefills in the
93 #define XPM_EVENT_INSRFETCH_CACHEREFILL 0x01
96 * Instruction fetch that causes a TLB refill at (at least) the lowest level of
97 * TLB. Includes the speculative requests in the count
99 #define XPM_EVENT_INSTRFECT_TLBREFILL 0x02
102 * Data read or write operation that causes a refill at (at least) the lowest
103 * level(s)of data or unified cache. Counts the number of allocations performed
104 * in the Data Cache due to a read or a write
106 #define XPM_EVENT_DATA_CACHEREFILL 0x03
109 * Data read or write operation that causes a cache access at (at least) the
110 * lowest level(s) of data or unified cache. This includes speculative reads
112 #define XPM_EVENT_DATA_CACHEACCESS 0x04
115 * Data read or write operation that causes a TLB refill at (at least) the
116 * lowest level of TLB. This does not include micro TLB misses due to PLD, PLI,
117 * CP15 Cache operation by MVA and CP15 VA to PA operations
119 #define XPM_EVENT_DATA_TLBREFILL 0x05
122 * Data read architecturally executed. Counts the number of data read
123 * instructions accepted by the Load Store Unit. This includes counting the
124 * speculative and aborted LDR/LDM, as well as the reads due to the SWP
127 #define XPM_EVENT_DATA_READS 0x06
130 * Data write architecturally executed. Counts the number of data write
131 * instructions accepted by the Load Store Unit. This includes counting the
132 * speculative and aborted STR/STM, as well as the writes due to the SWP
135 #define XPM_EVENT_DATA_WRITE 0x07
137 /* Exception taken. Counts the number of exceptions architecturally taken.*/
138 #define XPM_EVENT_EXCEPTION 0x09
140 /* Exception return architecturally executed.*/
141 #define XPM_EVENT_EXCEPRETURN 0x0A
144 * Change to ContextID retired. Counts the number of instructions
145 * architecturally executed writing into the ContextID Register
147 #define XPM_EVENT_CHANGECONTEXT 0x0B
150 * Software change of PC, except by an exception, architecturally executed.
151 * Count the number of PC changes architecturally executed, excluding the PC
152 * changes due to taken exceptions
154 #define XPM_EVENT_SW_CHANGEPC 0x0C
157 * Immediate branch architecturally executed (taken or not taken). This includes
158 * the branches which are flushed due to a previous load/store which aborts
161 #define XPM_EVENT_IMMEDBRANCH 0x0D
164 * Unaligned access architecturally executed. Counts the number of aborted
165 * unaligned accessed architecturally executed, and the number of not-aborted
166 * unaligned accesses, including the speculative ones
168 #define XPM_EVENT_UNALIGNEDACCESS 0x0F
171 * Branch mispredicted/not predicted. Counts the number of mispredicted or
172 * not-predicted branches executed. This includes the branches which are flushed
173 * due to a previous load/store which aborts late
175 #define XPM_EVENT_BRANCHMISS 0x10
178 * Counts clock cycles when the Cortex-A9 processor is not in WFE/WFI. This
179 * event is not exported on the PMUEVENT bus
181 #define XPM_EVENT_CLOCKCYCLES 0x11
184 * Branches or other change in program flow that could have been predicted by
185 * the branch prediction resources of the processor. This includes the branches
186 * which are flushed due to a previous load/store which aborts late
188 #define XPM_EVENT_BRANCHPREDICT 0x12
191 * Java bytecode execute. Counts the number of Java bytecodes being decoded,
192 * including speculative ones
194 #define XPM_EVENT_JAVABYTECODE 0x40
197 * Software Java bytecode executed. Counts the number of software java bytecodes
198 * being decoded, including speculative ones
200 #define XPM_EVENT_SWJAVABYTECODE 0x41
203 * Jazelle backward branches executed. Counts the number of Jazelle taken
204 * branches being executed. This includes the branches which are flushed due
205 * to a previous load/store which aborts late
207 #define XPM_EVENT_JAVABACKBRANCH 0x42
210 * Coherent linefill miss Counts the number of coherent linefill requests
211 * performed by the Cortex-A9 processor which also miss in all the other
212 * Cortex-A9 processors, meaning that the request is sent to the external
215 #define XPM_EVENT_COHERLINEMISS 0x50
218 * Coherent linefill hit. Counts the number of coherent linefill requests
219 * performed by the Cortex-A9 processor which hit in another Cortex-A9
220 * processor, meaning that the linefill data is fetched directly from the
221 * relevant Cortex-A9 cache
223 #define XPM_EVENT_COHERLINEHIT 0x51
226 * Instruction cache dependent stall cycles. Counts the number of cycles where
227 * the processor is ready to accept new instructions, but does not receive any
228 * due to the instruction side not being able to provide any and the
229 * instruction cache is currently performing at least one linefill
231 #define XPM_EVENT_INSTRSTALL 0x60
234 * Data cache dependent stall cycles. Counts the number of cycles where the core
235 * has some instructions that it cannot issue to any pipeline, and the Load
236 * Store unit has at least one pending linefill request, and no pending
238 #define XPM_EVENT_DATASTALL 0x61
241 * Main TLB miss stall cycles. Counts the number of cycles where the processor
242 * is stalled waiting for the completion of translation table walks from the
243 * main TLB. The processor stalls can be due to the instruction side not being
244 * able to provide the instructions, or to the data side not being able to
245 * provide the necessary data, due to them waiting for the main TLB translation
246 * table walk to complete
248 #define XPM_EVENT_MAINTLBSTALL 0x62
251 * Counts the number of STREX instructions architecturally executed and
254 #define XPM_EVENT_STREXPASS 0x63
257 * Counts the number of STREX instructions architecturally executed and
260 #define XPM_EVENT_STREXFAIL 0x64
263 * Data eviction. Counts the number of eviction requests due to a linefill in
266 #define XPM_EVENT_DATAEVICT 0x65
269 * Counts the number of cycles where the issue stage does not dispatch any
270 * instruction because it is empty or cannot dispatch any instructions
272 #define XPM_EVENT_NODISPATCH 0x66
275 * Counts the number of cycles where the issue stage is empty
277 #define XPM_EVENT_ISSUEEMPTY 0x67
280 * Counts the number of instructions going through the Register Renaming stage.
281 * This number is an approximate number of the total number of instructions
282 * speculatively executed, and even more approximate of the total number of
283 * instructions architecturally executed. The approximation depends mainly on
284 * the branch misprediction rate.
285 * The renaming stage can handle two instructions in the same cycle so the event
287 * - b00 no instructions renamed
288 * - b01 one instruction renamed
289 * - b10 two instructions renamed
291 #define XPM_EVENT_INSTRRENAME 0x68
294 * Counts the number of procedure returns whose condition codes do not fail,
295 * excluding all returns from exception. This count includes procedure returns
296 * which are flushed due to a previous load/store which aborts late.
297 * Only the following instructions are reported:
301 * - LDR pc,[sp],#offset
302 * The following instructions are not reported:
303 * - LDMIA R9!,{..,PC} (ThumbEE state only)
304 * - LDR PC,[R9],#offset (ThumbEE state only)
305 * - BX R0 (Rm != R14)
306 * - MOV PC,R0 (Rm != R14)
307 * - LDM SP,{...,PC} (writeback not specified)
308 * - LDR PC,[SP,#offset] (wrong addressing mode)
310 #define XPM_EVENT_PREDICTFUNCRET 0x6E
313 * Counts the number of instructions being executed in the main execution
314 * pipeline of the processor, the multiply pipeline and arithmetic logic unit
315 * pipeline. The counted instructions are still speculative
317 #define XPM_EVENT_MAINEXEC 0x70
320 * Counts the number of instructions being executed in the processor second
321 * execution pipeline (ALU). The counted instructions are still speculative
323 #define XPM_EVENT_SECEXEC 0x71
326 * Counts the number of instructions being executed in the Load/Store unit. The
327 * counted instructions are still speculative
329 #define XPM_EVENT_LDRSTR 0x72
332 * Counts the number of Floating-point instructions going through the Register
333 * Rename stage. Instructions are still speculative in this stage.
334 *Two floating-point instructions can be renamed in the same cycle so the event
336 *0b00 no floating-point instruction renamed
337 *0b01 one floating-point instruction renamed
338 *0b10 two floating-point instructions renamed
340 #define XPM_EVENT_FLOATRENAME 0x73
343 * Counts the number of Neon instructions going through the Register Rename
344 * stage.Instructions are still speculative in this stage.
345 * Two NEON instructions can be renamed in the same cycle so the event is two
347 *0b00 no NEON instruction renamed
348 *0b01 one NEON instruction renamed
349 *0b10 two NEON instructions renamed
351 #define XPM_EVENT_NEONRENAME 0x74
354 * Counts the number of cycles where the processor is stalled because PLD slots
357 #define XPM_EVENT_PLDSTALL 0x80
360 * Counts the number of cycles when the processor is stalled and the data side
361 * is stalled too because it is full and executing writes to the external
364 #define XPM_EVENT_WRITESTALL 0x81
367 * Counts the number of stall cycles due to main TLB misses on requests issued
368 * by the instruction side
370 #define XPM_EVENT_INSTRTLBSTALL 0x82
373 * Counts the number of stall cycles due to main TLB misses on requests issued
376 #define XPM_EVENT_DATATLBSTALL 0x83
379 * Counts the number of stall cycles due to micro TLB misses on the instruction
380 * side. This event does not include main TLB miss stall cycles that are already
381 * counted in the corresponding main TLB event
383 #define XPM_EVENT_INSTR_uTLBSTALL 0x84
386 * Counts the number of stall cycles due to micro TLB misses on the data side.
387 * This event does not include main TLB miss stall cycles that are already
388 * counted in the corresponding main TLB event
390 #define XPM_EVENT_DATA_uTLBSTALL 0x85
393 * Counts the number of stall cycles because of the execution of a DMB memory
394 * barrier. This includes all DMB instructions being executed, even
397 #define XPM_EVENT_DMB_STALL 0x86
400 * Counts the number of cycles during which the integer core clock is enabled
402 #define XPM_EVENT_INT_CLKEN 0x8A
405 * Counts the number of cycles during which the Data Engine clock is enabled
407 #define XPM_EVENT_DE_CLKEN 0x8B
410 * Counts the number of ISB instructions architecturally executed
412 #define XPM_EVENT_INSTRISB 0x90
415 * Counts the number of DSB instructions architecturally executed
417 #define XPM_EVENT_INSTRDSB 0x91
420 * Counts the number of DMB instructions speculatively executed
422 #define XPM_EVENT_INSTRDMB 0x92
425 * Counts the number of external interrupts executed by the processor
427 #define XPM_EVENT_EXTINT 0x93
430 * PLE cache line request completed
432 #define XPM_EVENT_PLE_LRC 0xA0
435 * PLE cache line request skipped
437 #define XPM_EVENT_PLE_LRS 0xA1
442 #define XPM_EVENT_PLE_FLUSH 0xA2
445 * PLE request complete
447 #define XPM_EVENT_PLE_CMPL 0xA3
452 #define XPM_EVENT_PLE_OVFL 0xA4
455 * PLE request programmed
457 #define XPM_EVENT_PLE_PROG 0xA5
460 * The following constants define the configurations for Cortex-A9 Performance
461 * Monitor Events. Each configuration configures the event counters for a set
463 * -----------------------------------------------
464 * Config PmCtr0... PmCtr5
465 * -----------------------------------------------
466 * XPM_CNTRCFG1 { XPM_EVENT_SOFTINCR,
467 * XPM_EVENT_INSRFETCH_CACHEREFILL,
468 * XPM_EVENT_INSTRFECT_TLBREFILL,
469 * XPM_EVENT_DATA_CACHEREFILL,
470 * XPM_EVENT_DATA_CACHEACCESS,
471 * XPM_EVENT_DATA_TLBREFILL }
473 * XPM_CNTRCFG2 { XPM_EVENT_DATA_READS,
474 * XPM_EVENT_DATA_WRITE,
475 * XPM_EVENT_EXCEPTION,
476 * XPM_EVENT_EXCEPRETURN,
477 * XPM_EVENT_CHANGECONTEXT,
478 * XPM_EVENT_SW_CHANGEPC }
480 * XPM_CNTRCFG3 { XPM_EVENT_IMMEDBRANCH,
481 * XPM_EVENT_UNALIGNEDACCESS,
482 * XPM_EVENT_BRANCHMISS,
483 * XPM_EVENT_CLOCKCYCLES,
484 * XPM_EVENT_BRANCHPREDICT,
485 * XPM_EVENT_JAVABYTECODE }
487 * XPM_CNTRCFG4 { XPM_EVENT_SWJAVABYTECODE,
488 * XPM_EVENT_JAVABACKBRANCH,
489 * XPM_EVENT_COHERLINEMISS,
490 * XPM_EVENT_COHERLINEHIT,
491 * XPM_EVENT_INSTRSTALL,
492 * XPM_EVENT_DATASTALL }
494 * XPM_CNTRCFG5 { XPM_EVENT_MAINTLBSTALL,
495 * XPM_EVENT_STREXPASS,
496 * XPM_EVENT_STREXFAIL,
497 * XPM_EVENT_DATAEVICT,
498 * XPM_EVENT_NODISPATCH,
499 * XPM_EVENT_ISSUEEMPTY }
501 * XPM_CNTRCFG6 { XPM_EVENT_INSTRRENAME,
502 * XPM_EVENT_PREDICTFUNCRET,
503 * XPM_EVENT_MAINEXEC,
506 * XPM_EVENT_FLOATRENAME }
508 * XPM_CNTRCFG7 { XPM_EVENT_NEONRENAME,
509 * XPM_EVENT_PLDSTALL,
510 * XPM_EVENT_WRITESTALL,
511 * XPM_EVENT_INSTRTLBSTALL,
512 * XPM_EVENT_DATATLBSTALL,
513 * XPM_EVENT_INSTR_uTLBSTALL }
515 * XPM_CNTRCFG8 { XPM_EVENT_DATA_uTLBSTALL,
516 * XPM_EVENT_DMB_STALL,
517 * XPM_EVENT_INT_CLKEN,
518 * XPM_EVENT_DE_CLKEN,
519 * XPM_EVENT_INSTRISB,
520 * XPM_EVENT_INSTRDSB }
522 * XPM_CNTRCFG9 { XPM_EVENT_INSTRDMB,
526 * XPM_EVENT_PLE_FLUSH,
527 * XPM_EVENT_PLE_CMPL }
529 * XPM_CNTRCFG10 { XPM_EVENT_PLE_OVFL,
530 * XPM_EVENT_PLE_PROG,
533 * XPM_EVENT_PLE_FLUSH,
534 * XPM_EVENT_PLE_CMPL }
536 * XPM_CNTRCFG11 { XPM_EVENT_DATASTALL,
537 * XPM_EVENT_INSRFETCH_CACHEREFILL,
538 * XPM_EVENT_INSTRFECT_TLBREFILL,
539 * XPM_EVENT_DATA_CACHEREFILL,
540 * XPM_EVENT_DATA_CACHEACCESS,
541 * XPM_EVENT_DATA_TLBREFILL }
543 #define XPM_CNTRCFG1 0
544 #define XPM_CNTRCFG2 1
545 #define XPM_CNTRCFG3 2
546 #define XPM_CNTRCFG4 3
547 #define XPM_CNTRCFG5 4
548 #define XPM_CNTRCFG6 5
549 #define XPM_CNTRCFG7 6
550 #define XPM_CNTRCFG8 7
551 #define XPM_CNTRCFG9 8
552 #define XPM_CNTRCFG10 9
553 #define XPM_CNTRCFG11 10
555 /**************************** Type Definitions ******************************/
557 /***************** Macros (Inline Functions) Definitions ********************/
559 /************************** Variable Definitions ****************************/
561 /************************** Function Prototypes *****************************/
563 /* Interface fuctions to access perfromance counters from abstraction layer */
564 void Xpm_SetEvents(int PmcrCfg);
565 void Xpm_GetEventCounters(u32 *PmCtrValue);