optimization - Is gcc reordering my volatile variables? -
i developping on stm32f746 (arm cortex-m7) gnu arm toolchain (gcc 5.4.1) using freertos. using following cflags set:
-g -werror -wextra -o2 -fno-common -ffunction-sections -fmessage-length=0 -g -mcpu=cortex-m7 -mthumb -mfloat-abi=hard -mfpu=fpv5-sp-d16 -fno-common -ffunction-sections -fmessage-length=0 -g -mcpu=cortex-m7 -mthumb -mfloat-abi=hard -mfpu=fpv5-sp-d16
the following code shows '0' in console if dma controller not powered on. normally, writing rcc_ahb1enr_dma1en in ahb1enr register power on dma controller.
/* *** other peripheral configurations *** */ rcc->ahb1enr |= rcc_ahb1enr_dma1en; dma1_stream3->cr = dma_sxcr_pl | dma_sxcr_minc | dma_sxcr_tcie | dma_sxcr_teie | dma_sxcr_dmeie; debug_printf("%x\r\n", dma1_stream3->cr);
but following code printing value of dma1_stream3->cr.
/* *** other peripheral configurations *** */ rcc->ahb1enr |= rcc_ahb1enr_dma1en; __asm__ __volatile__ ("" ::: "memory"); dma1_stream3->cr = dma_sxcr_pl | dma_sxcr_minc | dma_sxcr_tcie | dma_sxcr_teie | dma_sxcr_dmeie; debug_printf("%x\r\n", dma1_stream3->cr);
i never had add memory barriers other controllers why have this one? thinking either load/store instructions reordering gcc either timing issue. checked in errata sheet of cpu (stm32f746) or datasheet didn't find anything.
here definition of rcc structure:
typedef struct { /* ... */ __io uint32_t ahb1enr; /* ... */ } rcc_typedef;
and here definition of dma1_stream3 structure type:
typedef struct { __io uint32_t cr; /*!< dma stream x configuration register */ __io uint32_t ndtr; /*!< dma stream x number of data register */ __io uint32_t par; /*!< dma stream x peripheral address register */ __io uint32_t m0ar; /*!< dma stream x memory 0 address register */ __io uint32_t m1ar; /*!< dma stream x memory 1 address register */ __io uint32_t fcr; /*!< dma stream x fifo control register */ } dma_stream_typedef;
__io defined volatile.
here assembly code generated function:
08003f4c <bsp_initspiadis>: 8003f4c: 4956 ldr r1, [pc, #344] ; (80040a8 <bsp_initspiadis+0x15c>) 8003f4e: 4a57 ldr r2, [pc, #348] ; (80040ac <bsp_initspiadis+0x160>) 8003f50: 6b08 ldr r0, [r1, #48] ; 0x30 8003f52: f440 7080 orr.w r0, r0, #256 ; 0x100 8003f56: b5f8 push {r3, r4, r5, r6, r7, lr} 8003f58: 6308 str r0, [r1, #48] ; 0x30 8003f5a: 2500 movs r5, #0 8003f5c: 6810 ldr r0, [r2, #0] 8003f5e: f240 3607 movw r6, #775 ; 0x307 8003f62: 4b53 ldr r3, [pc, #332] ; (80040b0 <bsp_initspiadis+0x164>) 8003f64: f44f 5eb8 mov.w lr, #5888 ; 0x1700 8003f68: f020 000c bic.w r0, r0, #12 8003f6c: 4c51 ldr r4, [pc, #324] ; (80040b4 <bsp_initspiadis+0x168>) 8003f6e: 4f52 ldr r7, [pc, #328] ; (80040b8 <bsp_initspiadis+0x16c>) 8003f70: 6010 str r0, [r2, #0] 8003f72: 6810 ldr r0, [r2, #0] 8003f74: f040 0008 orr.w r0, r0, #8 8003f78: 6010 str r0, [r2, #0] 8003f7a: 6890 ldr r0, [r2, #8] 8003f7c: f020 000c bic.w r0, r0, #12 8003f80: 6090 str r0, [r2, #8] 8003f82: 6890 ldr r0, [r2, #8] 8003f84: f040 0004 orr.w r0, r0, #4 8003f88: 6090 str r0, [r2, #8] 8003f8a: 6890 ldr r0, [r2, #8] 8003f8c: f040 0008 orr.w r0, r0, #8 8003f90: 6090 str r0, [r2, #8] 8003f92: 6a10 ldr r0, [r2, #32] 8003f94: f020 00f0 bic.w r0, r0, #240 ; 0xf0 8003f98: 6210 str r0, [r2, #32] 8003f9a: 6a10 ldr r0, [r2, #32] 8003f9c: f040 0010 orr.w r0, r0, #16 8003fa0: 6210 str r0, [r2, #32] 8003fa2: 6a10 ldr r0, [r2, #32] 8003fa4: f040 0040 orr.w r0, r0, #64 ; 0x40 8003fa8: 6210 str r0, [r2, #32] 8003faa: 6b08 ldr r0, [r1, #48] ; 0x30 8003fac: 4a43 ldr r2, [pc, #268] ; (80040bc <bsp_initspiadis+0x170>) 8003fae: f040 0002 orr.w r0, r0, #2 8003fb2: 6308 str r0, [r1, #48] ; 0x30 8003fb4: 6818 ldr r0, [r3, #0] 8003fb6: f020 5040 bic.w r0, r0, #805306368 ; 0x30000000 8003fba: 6018 str r0, [r3, #0] 8003fbc: 6818 ldr r0, [r3, #0] 8003fbe: f040 5000 orr.w r0, r0, #536870912 ; 0x20000000 8003fc2: 6018 str r0, [r3, #0] 8003fc4: 6898 ldr r0, [r3, #8] 8003fc6: f020 5040 bic.w r0, r0, #805306368 ; 0x30000000 8003fca: 6098 str r0, [r3, #8] 8003fcc: 6898 ldr r0, [r3, #8] 8003fce: f040 5080 orr.w r0, r0, #268435456 ; 0x10000000 8003fd2: 6098 str r0, [r3, #8] 8003fd4: 6898 ldr r0, [r3, #8] 8003fd6: f040 5000 orr.w r0, r0, #536870912 ; 0x20000000 8003fda: 6098 str r0, [r3, #8] 8003fdc: 6a58 ldr r0, [r3, #36] ; 0x24 8003fde: f020 6070 bic.w r0, r0, #251658240 ; 0xf000000 8003fe2: 6258 str r0, [r3, #36] ; 0x24 8003fe4: 6a58 ldr r0, [r3, #36] ; 0x24 8003fe6: f040 7080 orr.w r0, r0, #16777216 ; 0x1000000 8003fea: 6258 str r0, [r3, #36] ; 0x24 8003fec: 6a58 ldr r0, [r3, #36] ; 0x24 8003fee: f040 6080 orr.w r0, r0, #67108864 ; 0x4000000 8003ff2: 6258 str r0, [r3, #36] ; 0x24 8003ff4: 6b08 ldr r0, [r1, #48] ; 0x30 8003ff6: f040 0002 orr.w r0, r0, #2 8003ffa: 6308 str r0, [r1, #48] ; 0x30 8003ffc: 6818 ldr r0, [r3, #0] 8003ffe: f020 4040 bic.w r0, r0, #3221225472 ; 0xc0000000 8004002: 6018 str r0, [r3, #0] 8004004: 6818 ldr r0, [r3, #0] 8004006: f040 4000 orr.w r0, r0, #2147483648 ; 0x80000000 800400a: 6018 str r0, [r3, #0] 800400c: 6898 ldr r0, [r3, #8] 800400e: f020 4040 bic.w r0, r0, #3221225472 ; 0xc0000000 8004012: 6098 str r0, [r3, #8] 8004014: 6898 ldr r0, [r3, #8] 8004016: f040 4080 orr.w r0, r0, #1073741824 ; 0x40000000 800401a: 6098 str r0, [r3, #8] 800401c: 6898 ldr r0, [r3, #8] 800401e: f040 4000 orr.w r0, r0, #2147483648 ; 0x80000000 8004022: 6098 str r0, [r3, #8] 8004024: 6a58 ldr r0, [r3, #36] ; 0x24 8004026: f020 4070 bic.w r0, r0, #4026531840 ; 0xf0000000 800402a: 6258 str r0, [r3, #36] ; 0x24 800402c: 6a58 ldr r0, [r3, #36] ; 0x24 800402e: f040 5080 orr.w r0, r0, #268435456 ; 0x10000000 8004032: 6258 str r0, [r3, #36] ; 0x24 8004034: 6a58 ldr r0, [r3, #36] ; 0x24 8004036: f040 4080 orr.w r0, r0, #1073741824 ; 0x40000000 800403a: 6258 str r0, [r3, #36] ; 0x24 800403c: 6c0b ldr r3, [r1, #64] ; 0x40 800403e: 4820 ldr r0, [pc, #128] ; (80040c0 <bsp_initspiadis+0x174>) 8004040: f443 4380 orr.w r3, r3, #16384 ; 0x4000 8004044: 640b str r3, [r1, #64] ; 0x40 8004046: 61e5 str r5, [r4, #28] 8004048: 6026 str r6, [r4, #0] 800404a: 6823 ldr r3, [r4, #0] 800404c: 4e1d ldr r6, [pc, #116] ; (80040c4 <bsp_initspiadis+0x178>) 800404e: f023 0338 bic.w r3, r3, #56 ; 0x38 8004052: 6023 str r3, [r4, #0] 8004054: 6823 ldr r3, [r4, #0] 8004056: f043 0338 orr.w r3, r3, #56 ; 0x38 800405a: 6023 str r3, [r4, #0] 800405c: f8c4 e004 str.w lr, [r4, #4] 8004060: 6b0b ldr r3, [r1, #48] ; 0x30 8004062: 4c19 ldr r4, [pc, #100] ; (80040c8 <bsp_initspiadis+0x17c>) 8004064: f443 1300 orr.w r3, r3, #2097152 ; 0x200000 8004068: 630b str r3, [r1, #48] ; 0x30 800406a: 603a str r2, [r7, #0] 800406c: 6839 ldr r1, [r7, #0] 800406e: f000 faab bl 80045c8 <printf> 8004072: 4b16 ldr r3, [pc, #88] ; (80040cc <bsp_initspiadis+0x180>) 8004074: 4629 mov r1, r5 8004076: 2203 movs r2, #3 8004078: 601c str r4, [r3, #0] 800407a: 2001 movs r0, #1 800407c: f7fe f90e bl 800229c <xqueuegenericcreate> 8004080: 4629 mov r1, r5 8004082: 6170 str r0, [r6, #20] 8004084: 2203 movs r2, #3 8004086: 2001 movs r0, #1 8004088: f44f 4480 mov.w r4, #16384 ; 0x4000 800408c: f7fe f906 bl 800229c <xqueuegenericcreate> 8004090: 4b0f ldr r3, [pc, #60] ; (80040d0 <bsp_initspiadis+0x184>) 8004092: 2240 movs r2, #64 ; 0x40 8004094: f44f 4100 mov.w r1, #32768 ; 0x8000 8004098: 6130 str r0, [r6, #16] 800409a: 601c str r4, [r3, #0] 800409c: 6019 str r1, [r3, #0] 800409e: f883 230e strb.w r2, [r3, #782] ; 0x30e 80040a2: f883 230f strb.w r2, [r3, #783] ; 0x30f 80040a6: bdf8 pop {r3, r4, r5, r6, r7, pc} 80040a8: 40023800 andmi r3, r2, r0, lsl #16 80040ac: 40022000 andmi r2, r2, r0 80040b0: 40020400 andmi r0, r2, r0, lsl #8 80040b4: 40003800 andmi r3, r0, r0, lsl #16 80040b8: 40026058 andmi r6, r2, r8, asr r0 80040bc: 00030416 andeq r0, r3, r6, lsl r4 80040c0: 08008ec8 stmdaeq r0, {r3, r6, r7, r9, sl, fp, pc} 80040c4: 2000c2d0 ldrdcs ip, [r0], -r0 80040c8: 00030456 andeq r0, r3, r6, asr r4 80040cc: 40026070 andmi r6, r2, r0, ror r0 80040d0: e000e100 , lr, r0, r0, lsl #2
yes after operation enabling peripheral clock there bug in both stm32f4 & f7 micros. there small delay needed
i use __dsb();
it. can provide delay (it described in errata not affected uc (actually have not found unaffected f4 or f7)). use dsb advised in f4 errata, reminds me bug.
2.1.5 delay after rcc peripheral clock enabling description delay between rcc peripheral clock enable , effective peripheral enabling should taken account in order manage peripheral read/write registers. delay depends on peripheral mapping: • if peripheral mapped on ahb: delay should equal 2 ahb cycles. • if peripheral mapped on apb: delay should equal 1 + (ahb/apb prescaler) cycles. workarounds 1. use dsb instructio n stall cortex ® -m4 cpu pipeline until instruction completed. 2. insert “n” nops between rcc enable bi t write , peripheral register writes (n = 2 ahb peripherals, n = 1 + ahb/ apb prescaler in case of apb peripherals). 3. or insert dummy read operation corresponding register after enabling peripheral clock.
Comments
Post a Comment