]> git.sur5r.net Git - u-boot/blob - arch/arm/mach-sunxi/dram_sun9i.c
Merge branch 'master' of git://git.denx.de/u-boot-uniphier
[u-boot] / arch / arm / mach-sunxi / dram_sun9i.c
1 /*
2  * sun9i dram controller initialisation
3  *
4  * (C) Copyright 2007-2015
5  * Allwinner Technology Co., Ltd. <www.allwinnertech.com>
6  * Jerry Wang <wangflord@allwinnertech.com>
7  *
8  * (C) Copyright 2016 Theobroma Systems Design und Consulting GmbH
9  *                    Philipp Tomsich <philipp.tomsich@theobroma-systems.com>
10  *
11  * SPDX-License-Identifier:     GPL-2.0+
12  */
13
14 #include <common.h>
15 #include <dm.h>
16 #include <errno.h>
17 #include <ram.h>
18 #include <asm/io.h>
19 #include <asm/arch/clock.h>
20 #include <asm/arch/dram.h>
21 #include <asm/arch/sys_proto.h>
22
23 DECLARE_GLOBAL_DATA_PTR;
24
25 #define DRAM_CLK (CONFIG_DRAM_CLK * 1000000)
26
27 /*
28  * The following amounts to an extensive rewrite of the code received from
29  * Allwinner as part of the open-source bootloader release (refer to
30  * https://github.com/allwinner-zh/bootloader.git) and augments the upstream
31  * sources (which act as the primary reference point for the inner workings
32  * of the 'underdocumented' DRAM controller in the A80) using the following
33  * documentation for other memory controllers based on the (Synopsys)
34  * Designware IP (DDR memory protocol controller and DDR PHY)
35  *   * TI Keystone II Architecture: DDR3 Memory Controller, User's Guide
36  *     Document 'SPRUHN7C', Oct 2013 (revised March 2015)
37  *   * Xilinx Zynq UltraScale+ MPSoC Register Reference
38  *     document ug1087 (v1.0)
39  * Note that the Zynq-documentation provides a very close match for the DDR
40  * memory protocol controller (and provides a very good guide to the rounding
41  * rules for various timings), whereas the TI Keystone II document should be
42  * referred to for DDR PHY specifics only.
43  *
44  * The DRAM controller in the A80 runs at half the frequency of the DDR PHY
45  * (i.e. the rules for MEMC_FREQ_RATIO=2 from the Zynq-documentation apply).
46  *
47  * Known limitations
48  * =================
49  * In the current state, the following features are not fully supported and
50  * a number of simplifying assumptions have been made:
51  *   1) Only DDR3 support is implemented, as our test platform (the A80-Q7
52  *      module) is designed to accomodate DDR3/DDR3L.
53  *   2) Only 2T-mode has been implemented and tested.
54  *   3) The controller supports two different clocking strategies (PLL6 can
55  *      either be 2*CK or CK/2)... we only support the 2*CK clock at this
56  *      time and haven't verified whether the alternative clocking strategy
57  *      works.  If you are interested in porting this over/testing this,
58  *      please refer to cases where bit 0 of 'dram_tpr8' is tested in the
59  *      original code from Allwinner.
60  *   4) Support for 2 ranks per controller is not implemented (as we don't
61  *      the hardware to test it).
62  *
63  * Future directions
64  * =================
65  * The driver should be driven from a device-tree based configuration that
66  * can dynamically provide the necessary timing parameters (i.e. target
67  * frequency and speed-bin information)---the data structures used in the
68  * calculation of the timing parameters are already designed to capture
69  * similar information as the device tree would provide.
70  *
71  * To enable a device-tree based configuration of the sun9i platform, we
72  * will need to enable CONFIG_TPL and bootstrap in 3 stages: initially
73  * into SRAM A1 (40KB) and next into SRAM A2 (160KB)---which would be the
74  * stage to initialise the platform via the device-tree---before having
75  * the full U-Boot run from DDR.
76  */
77
78 /*
79  * A number of DDR3 timings are given as "the greater of a fixed number of
80  * clock cycles (CK) or nanoseconds.  We express these using a structure
81  * that holds a cycle count and a duration in picoseconds (so we can model
82  * sub-ns timings, such as 7.5ns without losing precision or resorting to
83  * rounding up early.
84  */
85 struct dram_sun9i_timing {
86         u32 ck;
87         u32 ps;
88 };
89
90 /* */
91 struct dram_sun9i_cl_cwl_timing {
92         u32 CL;
93         u32 CWL;
94         u32 tCKmin;  /* in ps */
95         u32 tCKmax;  /* in ps */
96 };
97
98 struct dram_sun9i_para {
99         u32 dram_type;
100
101         u8 bus_width;
102         u8 chan;
103         u8 rank;
104         u8 rows;
105         u16 page_size;
106
107         /* Timing information for each speed-bin */
108         struct dram_sun9i_cl_cwl_timing *cl_cwl_table;
109         u32 cl_cwl_numentries;
110
111         /*
112          * For the timings, we try to keep the order and grouping used in
113          * JEDEC Standard No. 79-3F
114          */
115
116         /* timings */
117         u32 tREFI; /* in ns */
118         u32 tRFC;  /* in ns */
119
120         u32 tRAS;  /* in ps */
121
122         /* command and address timing */
123         u32 tDLLK; /* in nCK */
124         struct dram_sun9i_timing tRTP;
125         struct dram_sun9i_timing tWTR;
126         u32 tWR;   /* in nCK */
127         u32 tMRD;  /* in nCK */
128         struct dram_sun9i_timing tMOD;
129         u32 tRCD;  /* in ps */
130         u32 tRP;   /* in ps */
131         u32 tRC;   /* in ps */
132         u32 tCCD;  /* in nCK */
133         struct dram_sun9i_timing tRRD;
134         u32 tFAW;  /* in ps */
135
136         /* calibration timing */
137         /* struct dram_sun9i_timing tZQinit; */
138         struct dram_sun9i_timing tZQoper;
139         struct dram_sun9i_timing tZQCS;
140
141         /* reset timing */
142         /* struct dram_sun9i_timing tXPR; */
143
144         /* self-refresh timings */
145         struct dram_sun9i_timing tXS;
146         u32 tXSDLL; /* in nCK */
147         /* struct dram_sun9i_timing tCKESR; */
148         struct dram_sun9i_timing tCKSRE;
149         struct dram_sun9i_timing tCKSRX;
150
151         /* power-down timings */
152         struct dram_sun9i_timing tXP;
153         struct dram_sun9i_timing tXPDLL;
154         struct dram_sun9i_timing tCKE;
155
156         /* write leveling timings */
157         u32 tWLMRD;    /* min, in nCK */
158         /* u32 tWLDQSEN;  min, in nCK */
159         u32 tWLO;      /* max, in ns */
160         /* u32 tWLOE;     max, in ns */
161
162         /* u32 tCKDPX;    in nCK */
163         /* u32 tCKCSX;    in nCK */
164 };
165
166 static void mctl_sys_init(void);
167
168 #define SCHED_RDWR_IDLE_GAP(n)            ((n & 0xff) << 24)
169 #define SCHED_GO2CRITICAL_HYSTERESIS(n)   ((n & 0xff) << 16)
170 #define SCHED_LPR_NUM_ENTRIES(n)          ((n & 0xff) <<  8)
171 #define SCHED_PAGECLOSE                   (1 << 2)
172 #define SCHED_PREFER_WRITE                (1 << 1)
173 #define SCHED_FORCE_LOW_PRI_N             (1 << 0)
174
175 #define SCHED_CONFIG            (SCHED_RDWR_IDLE_GAP(0xf) | \
176                                  SCHED_GO2CRITICAL_HYSTERESIS(0x80) | \
177                                  SCHED_LPR_NUM_ENTRIES(0x20) | \
178                                  SCHED_FORCE_LOW_PRI_N)
179 #define PERFHPR0_CONFIG                   0x0000001f
180 #define PERFHPR1_CONFIG                   0x1f00001f
181 #define PERFLPR0_CONFIG                   0x000000ff
182 #define PERFLPR1_CONFIG                   0x0f0000ff
183 #define PERFWR0_CONFIG                    0x000000ff
184 #define PERFWR1_CONFIG                    0x0f0001ff
185
186 static void mctl_ctl_sched_init(unsigned long  base)
187 {
188         struct sunxi_mctl_ctl_reg *mctl_ctl =
189                 (struct sunxi_mctl_ctl_reg *)base;
190
191         /* Needs to be done before the global clk enable... */
192         writel(SCHED_CONFIG, &mctl_ctl->sched);
193         writel(PERFHPR0_CONFIG, &mctl_ctl->perfhpr0);
194         writel(PERFHPR1_CONFIG, &mctl_ctl->perfhpr1);
195         writel(PERFLPR0_CONFIG, &mctl_ctl->perflpr0);
196         writel(PERFLPR1_CONFIG, &mctl_ctl->perflpr1);
197         writel(PERFWR0_CONFIG, &mctl_ctl->perfwr0);
198         writel(PERFWR1_CONFIG, &mctl_ctl->perfwr1);
199 }
200
201 static void mctl_sys_init(void)
202 {
203         struct sunxi_ccm_reg * const ccm =
204                 (struct sunxi_ccm_reg *)SUNXI_CCM_BASE;
205         struct sunxi_mctl_com_reg * const mctl_com =
206                 (struct sunxi_mctl_com_reg *)SUNXI_DRAM_COM_BASE;
207
208         debug("Setting PLL6 to %d\n", DRAM_CLK * 2);
209         clock_set_pll6(DRAM_CLK * 2);
210
211         /* Original dram init code which may come in handy later
212         ********************************************************
213         clock_set_pll6(use_2channelPLL ? (DRAM_CLK * 2) :
214                                          (DRAM_CLK / 2), false);
215
216         if ((para->dram_clk <= 400)|((para->dram_tpr8 & 0x1)==0)) {
217                  * PLL6 should be 2*CK *
218                  * ccm_setup_pll6_ddr_clk(PLL6_DDR_CLK); *
219                 ccm_setup_pll6_ddr_clk((1000000 * (para->dram_clk) * 2), 0);
220         } else {
221                  * PLL6 should be CK/2 *
222                 ccm_setup_pll6_ddr_clk((1000000 * (para->dram_clk) / 2), 1);
223         }
224
225         if (para->dram_tpr13 & (0xf<<18)) {
226                  *
227                  * bit21:bit18=0001:pll swing 0.4
228                  * bit21:bit18=0010:pll swing 0.3
229                  * bit21:bit18=0100:pll swing 0.2
230                  * bit21:bit18=1000:pll swing 0.1
231                  *
232                 dram_dbg("DRAM fre extend open !\n");
233                 reg_val=mctl_read_w(CCM_PLL6_DDR_REG);
234                 reg_val&=(0x1<<16);
235                 reg_val=reg_val>>16;
236
237                 if(para->dram_tpr13 & (0x1<<18))
238                 {
239                         mctl_write_w(CCM_PLL_BASE + 0x114,
240                                 (0x3333U|(0x3<<17)|(reg_val<<19)|(0x120U<<20)|
241                                 (0x2U<<29)|(0x1U<<31)));
242                 }
243                 else if(para->dram_tpr13 & (0x1<<19))
244                 {
245                         mctl_write_w(CCM_PLL_BASE + 0x114,
246                                 (0x6666U|(0x3U<<17)|(reg_val<<19)|(0xD8U<<20)|
247                                 (0x2U<<29)|(0x1U<<31)));
248                 }
249                 else if(para->dram_tpr13 & (0x1<<20))
250                 {
251                         mctl_write_w(CCM_PLL_BASE + 0x114,
252                                 (0x9999U|(0x3U<<17)|(reg_val<<19)|(0x90U<<20)|
253                                 (0x2U<<29)|(0x1U<<31)));
254                 }
255                 else if(para->dram_tpr13 & (0x1<<21))
256                 {
257                         mctl_write_w(CCM_PLL_BASE + 0x114,
258                                 (0xccccU|(0x3U<<17)|(reg_val<<19)|(0x48U<<20)|
259                                 (0x2U<<29)|(0x1U<<31)));
260                 }
261
262                 //frequency extend open
263                 reg_val = mctl_read_w(CCM_PLL6_DDR_REG);
264                 reg_val |= ((0x1<<24)|(0x1<<30));
265                 mctl_write_w(CCM_PLL6_DDR_REG, reg_val);
266
267
268                 while(mctl_read_w(CCM_PLL6_DDR_REG) & (0x1<<30));
269         }
270
271         aw_delay(0x20000);      //make some delay
272         ********************************************************
273         */
274
275         /* assert mctl reset */
276         clrbits_le32(&ccm->ahb_reset0_cfg, 1 << AHB_RESET_OFFSET_MCTL);
277         /* stop mctl clock */
278         clrbits_le32(&ccm->ahb_gate0, 1 << AHB_GATE_OFFSET_MCTL);
279
280         sdelay(2000);
281
282         /* deassert mctl reset */
283         setbits_le32(&ccm->ahb_reset0_cfg, 1 << AHB_RESET_OFFSET_MCTL);
284         /* enable mctl clock */
285         setbits_le32(&ccm->ahb_gate0, 1 << AHB_GATE_OFFSET_MCTL);
286
287         /* set up the transactions scheduling before enabling the global clk */
288         mctl_ctl_sched_init(SUNXI_DRAM_CTL0_BASE);
289         mctl_ctl_sched_init(SUNXI_DRAM_CTL1_BASE);
290         sdelay(1000);
291
292         debug("2\n");
293
294         /* (3 << 12): PLL_DDR */
295         writel((3 << 12) | (1 << 16), &ccm->dram_clk_cfg);
296         do {
297                 debug("Waiting for DRAM_CLK_CFG\n");
298                 sdelay(10000);
299         } while (readl(&ccm->dram_clk_cfg) & (1 << 16));
300         setbits_le32(&ccm->dram_clk_cfg, (1 << 31));
301
302         /* TODO: we only support the common case ... i.e. 2*CK */
303         setbits_le32(&mctl_com->ccr, (1 << 14) | (1 << 30));
304         writel(2, &mctl_com->rmcr); /* controller clock is PLL6/4 */
305
306         sdelay(2000);
307
308         /* Original dram init code which may come in handy later
309         ********************************************************
310         if ((para->dram_clk <= 400) | ((para->dram_tpr8 & 0x1) == 0)) {
311                  * PLL6 should be 2*CK *
312                  * gating 2 channel pll *
313                 reg_val = mctl_read_w(MC_CCR);
314                 reg_val |= ((0x1 << 14) | (0x1U << 30));
315                 mctl_write_w(MC_CCR, reg_val);
316                 mctl_write_w(MC_RMCR, 0x2); * controller clock use pll6/4 *
317         } else {
318                  * enable 2 channel pll *
319                 reg_val = mctl_read_w(MC_CCR);
320                 reg_val &= ~((0x1 << 14) | (0x1U << 30));
321                 mctl_write_w(MC_CCR, reg_val);
322                 mctl_write_w(MC_RMCR, 0x0); * controller clock use pll6 *
323         }
324
325         reg_val = mctl_read_w(MC_CCR);
326         reg_val &= ~((0x1<<15)|(0x1U<<31));
327         mctl_write_w(MC_CCR, reg_val);
328         aw_delay(20);
329         //aw_delay(0x10);
330         ********************************************************
331         */
332
333         clrbits_le32(&mctl_com->ccr, MCTL_CCR_CH0_CLK_EN | MCTL_CCR_CH1_CLK_EN);
334         sdelay(1000);
335
336         setbits_le32(&mctl_com->ccr, MCTL_CCR_CH0_CLK_EN);
337         /* TODO if (para->chan == 2) */
338         setbits_le32(&mctl_com->ccr, MCTL_CCR_CH1_CLK_EN);
339 }
340
341 static void mctl_com_init(struct dram_sun9i_para *para)
342 {
343         struct sunxi_mctl_com_reg * const mctl_com =
344                 (struct sunxi_mctl_com_reg *)SUNXI_DRAM_COM_BASE;
345
346         /* TODO: hard-wired for DDR3 now */
347         writel(((para->chan == 2) ? MCTL_CR_CHANNEL_DUAL :
348                                     MCTL_CR_CHANNEL_SINGLE)
349                | MCTL_CR_DRAMTYPE_DDR3 | MCTL_CR_BANK(1)
350                | MCTL_CR_ROW(para->rows)
351                | ((para->bus_width == 32) ? MCTL_CR_BUSW32 : MCTL_CR_BUSW16)
352                | MCTL_CR_PAGE_SIZE(para->page_size) | MCTL_CR_RANK(para->rank),
353                &mctl_com->cr);
354
355         debug("CR: %d\n", readl(&mctl_com->cr));
356 }
357
358 static u32 mctl_channel_init(u32 ch_index, struct dram_sun9i_para *para)
359 {
360         struct sunxi_mctl_ctl_reg *mctl_ctl;
361         struct sunxi_mctl_phy_reg *mctl_phy;
362
363         u32 CL = 0;
364         u32 CWL = 0;
365         u16 mr[4] = { 0, };
366
367 #define PS2CYCLES_FLOOR(n)    ((n * CONFIG_DRAM_CLK) / 1000000)
368 #define PS2CYCLES_ROUNDUP(n)  ((n * CONFIG_DRAM_CLK + 999999) / 1000000)
369 #define NS2CYCLES_FLOOR(n)    ((n * CONFIG_DRAM_CLK) / 1000)
370 #define NS2CYCLES_ROUNDUP(n)  ((n * CONFIG_DRAM_CLK + 999) / 1000)
371 #define MAX(a, b)             ((a) > (b) ? (a) : (b))
372
373         /*
374          * Convert the values to cycle counts (nCK) from what is provided
375          * by the definition of each speed bin.
376          */
377         /* const u32 tREFI = NS2CYCLES_FLOOR(para->tREFI); */
378         const u32 tREFI = NS2CYCLES_FLOOR(para->tREFI);
379         const u32 tRFC  = NS2CYCLES_ROUNDUP(para->tRFC);
380         const u32 tRCD  = PS2CYCLES_ROUNDUP(para->tRCD);
381         const u32 tRP   = PS2CYCLES_ROUNDUP(para->tRP);
382         const u32 tRC   = PS2CYCLES_ROUNDUP(para->tRC);
383         const u32 tRAS  = PS2CYCLES_ROUNDUP(para->tRAS);
384
385         /* command and address timing */
386         const u32 tDLLK = para->tDLLK;
387         const u32 tRTP  = MAX(para->tRTP.ck, PS2CYCLES_ROUNDUP(para->tRTP.ps));
388         const u32 tWTR  = MAX(para->tWTR.ck, PS2CYCLES_ROUNDUP(para->tWTR.ps));
389         const u32 tWR   = NS2CYCLES_FLOOR(para->tWR);
390         const u32 tMRD  = para->tMRD;
391         const u32 tMOD  = MAX(para->tMOD.ck, PS2CYCLES_ROUNDUP(para->tMOD.ps));
392         const u32 tCCD  = para->tCCD;
393         const u32 tRRD  = MAX(para->tRRD.ck, PS2CYCLES_ROUNDUP(para->tRRD.ps));
394         const u32 tFAW  = PS2CYCLES_ROUNDUP(para->tFAW);
395
396         /* calibration timings */
397         /* const u32 tZQinit = MAX(para->tZQinit.ck,
398                                 PS2CYCLES_ROUNDUP(para->tZQinit.ps)); */
399         const u32 tZQoper = MAX(para->tZQoper.ck,
400                                 PS2CYCLES_ROUNDUP(para->tZQoper.ps));
401         const u32 tZQCS   = MAX(para->tZQCS.ck,
402                                 PS2CYCLES_ROUNDUP(para->tZQCS.ps));
403
404         /* reset timing */
405         /* const u32 tXPR  = MAX(para->tXPR.ck,
406                                 PS2CYCLES_ROUNDUP(para->tXPR.ps)); */
407
408         /* power-down timings */
409         const u32 tXP    = MAX(para->tXP.ck, PS2CYCLES_ROUNDUP(para->tXP.ps));
410         const u32 tXPDLL = MAX(para->tXPDLL.ck,
411                                PS2CYCLES_ROUNDUP(para->tXPDLL.ps));
412         const u32 tCKE   = MAX(para->tCKE.ck, PS2CYCLES_ROUNDUP(para->tCKE.ps));
413
414         /*
415          * self-refresh timings (keep below power-down timings, as tCKESR
416          * needs to be calculated based on the nCK value of tCKE)
417          */
418         const u32 tXS    = MAX(para->tXS.ck, PS2CYCLES_ROUNDUP(para->tXS.ps));
419         const u32 tXSDLL = para->tXSDLL;
420         const u32 tCKSRE = MAX(para->tCKSRE.ck,
421                                PS2CYCLES_ROUNDUP(para->tCKSRE.ps));
422         const u32 tCKESR = tCKE + 1;
423         const u32 tCKSRX = MAX(para->tCKSRX.ck,
424                                PS2CYCLES_ROUNDUP(para->tCKSRX.ps));
425
426         /* write leveling timings */
427         const u32 tWLMRD = para->tWLMRD;
428         /* const u32 tWLDQSEN = para->tWLDQSEN; */
429         const u32 tWLO = PS2CYCLES_FLOOR(para->tWLO);
430         /* const u32 tWLOE = PS2CYCLES_FLOOR(para->tWLOE); */
431
432         const u32 tRASmax = tREFI * 9;
433         int i;
434
435         for (i = 0; i < para->cl_cwl_numentries; ++i) {
436                 const u32 tCK = 1000000 / CONFIG_DRAM_CLK;
437
438                 if ((para->cl_cwl_table[i].tCKmin <= tCK) &&
439                     (tCK < para->cl_cwl_table[i].tCKmax)) {
440                         CL = para->cl_cwl_table[i].CL;
441                         CWL = para->cl_cwl_table[i].CWL;
442
443                         debug("found CL/CWL: CL = %d, CWL = %d\n", CL, CWL);
444                         break;
445                 }
446         }
447
448         if ((CL == 0) && (CWL == 0)) {
449                 printf("failed to find valid CL/CWL for operating point %d MHz\n",
450                        CONFIG_DRAM_CLK);
451                 return 0;
452         }
453
454         if (ch_index == 0) {
455                 mctl_ctl = (struct sunxi_mctl_ctl_reg *)SUNXI_DRAM_CTL0_BASE;
456                 mctl_phy = (struct sunxi_mctl_phy_reg *)SUNXI_DRAM_PHY0_BASE;
457         } else {
458                 mctl_ctl = (struct sunxi_mctl_ctl_reg *)SUNXI_DRAM_CTL1_BASE;
459                 mctl_phy = (struct sunxi_mctl_phy_reg *)SUNXI_DRAM_PHY1_BASE;
460         }
461
462         if (para->dram_type == DRAM_TYPE_DDR3) {
463                 mr[0] = DDR3_MR0_PPD_FAST_EXIT | DDR3_MR0_WR(tWR) |
464                         DDR3_MR0_CL(CL);
465                 mr[1] = DDR3_MR1_RTT120OHM;
466                 mr[2] = DDR3_MR2_TWL(CWL);
467                 mr[3] = 0;
468
469                 /*
470                  * DRAM3 initialisation requires holding CKE LOW for
471                  * at least 500us prior to starting the initialisation
472                  * sequence and at least 10ns after driving CKE HIGH
473                  * before the initialisation sequence may be started).
474                  *
475                  * Refer to Micron document "TN-41-07: DDR3 Power-Up,
476                  * Initialization, and Reset DDR3 Initialization
477                  * Routine" for details).
478                  */
479                 writel(MCTL_INIT0_POST_CKE_x1024(1) |
480                        MCTL_INIT0_PRE_CKE_x1024(
481                             (500 * CONFIG_DRAM_CLK + 1023) / 1024), /* 500us */
482                        &mctl_ctl->init[0]);
483                 writel(MCTL_INIT1_DRAM_RSTN_x1024(1),
484                        &mctl_ctl->init[1]);
485                 /* INIT2 is not used for DDR3 */
486                 writel(MCTL_INIT3_MR(mr[0]) | MCTL_INIT3_EMR(mr[1]),
487                        &mctl_ctl->init[3]);
488                 writel(MCTL_INIT4_EMR2(mr[2]) | MCTL_INIT4_EMR3(mr[3]),
489                        &mctl_ctl->init[4]);
490                 writel(MCTL_INIT5_DEV_ZQINIT_x32(512 / 32), /* 512 cycles */
491                        &mctl_ctl->init[5]);
492         } else {
493                 /* !!! UNTESTED !!! */
494                 /*
495                  * LPDDR2 and/or LPDDR3 require a 200us minimum delay
496                  * after driving CKE HIGH in the initialisation sequence.
497                  */
498                 writel(MCTL_INIT0_POST_CKE_x1024(
499                                 (200 * CONFIG_DRAM_CLK + 1023) / 1024),
500                        &mctl_ctl->init[0]);
501                 writel(MCTL_INIT1_DRAM_RSTN_x1024(1),
502                        &mctl_ctl->init[1]);
503                 writel(MCTL_INIT2_IDLE_AFTER_RESET_x32(
504                                 (CONFIG_DRAM_CLK + 31) / 32) /* 1us */
505                        | MCTL_INIT2_MIN_STABLE_CLOCK_x1(5),  /* 5 cycles */
506                        &mctl_ctl->init[2]);
507                 writel(MCTL_INIT3_MR(mr[1]) | MCTL_INIT3_EMR(mr[2]),
508                        &mctl_ctl->init[3]);
509                 writel(MCTL_INIT4_EMR2(mr[3]),
510                        &mctl_ctl->init[4]);
511                 writel(MCTL_INIT5_DEV_ZQINIT_x32(
512                                 (CONFIG_DRAM_CLK + 31) / 32) /* 1us */
513                        | MCTL_INIT5_MAX_AUTO_INIT_x1024(
514                                 (10 * CONFIG_DRAM_CLK + 1023) / 1024),
515                        &mctl_ctl->init[5]);
516         }
517
518         /* (DDR3) We always use a burst-length of 8. */
519 #define MCTL_BL               8
520         /* wr2pre: WL + BL/2 + tWR */
521 #define WR2PRE           (MCTL_BL/2 + CWL + tWTR)
522         /* wr2rd = CWL + BL/2 + tWTR */
523 #define WR2RD            (MCTL_BL/2 + CWL + tWTR)
524         /*
525          * rd2wr = RL + BL/2 + 2 - WL (for DDR3)
526          * rd2wr = RL + BL/2 + RU(tDQSCKmax/tCK) + 1 - WL (for LPDDR2/LPDDR3)
527          */
528 #define RD2WR            (CL + MCTL_BL/2 + 2 - CWL)
529 #define MCTL_PHY_TRTW        0
530 #define MCTL_PHY_TRTODT      0
531
532 #define MCTL_DIV2(n)         ((n + 1)/2)
533 #define MCTL_DIV32(n)        (n/32)
534 #define MCTL_DIV1024(n)      (n/1024)
535
536         writel((MCTL_DIV2(WR2PRE) << 24) | (MCTL_DIV2(tFAW) << 16) |
537                (MCTL_DIV1024(tRASmax) << 8) | (MCTL_DIV2(tRAS) << 0),
538                &mctl_ctl->dramtmg[0]);
539         writel((MCTL_DIV2(tXP) << 16) | (MCTL_DIV2(tRTP) << 8) |
540                (MCTL_DIV2(tRC) << 0),
541                &mctl_ctl->dramtmg[1]);
542         writel((MCTL_DIV2(CWL) << 24) | (MCTL_DIV2(CL) << 16) |
543                (MCTL_DIV2(RD2WR) << 8) | (MCTL_DIV2(WR2RD) << 0),
544                &mctl_ctl->dramtmg[2]);
545         /*
546          * Note: tMRW is located at bit 16 (and up) in DRAMTMG3...
547          * this is only relevant for LPDDR2/LPDDR3
548          */
549         writel((MCTL_DIV2(tMRD) << 12) | (MCTL_DIV2(tMOD) << 0),
550                &mctl_ctl->dramtmg[3]);
551         writel((MCTL_DIV2(tRCD) << 24) | (MCTL_DIV2(tCCD) << 16) |
552                (MCTL_DIV2(tRRD) << 8) | (MCTL_DIV2(tRP) << 0),
553                &mctl_ctl->dramtmg[4]);
554         writel((MCTL_DIV2(tCKSRX) << 24) | (MCTL_DIV2(tCKSRE) << 16) |
555                (MCTL_DIV2(tCKESR) << 8) | (MCTL_DIV2(tCKE) << 0),
556                &mctl_ctl->dramtmg[5]);
557
558         /* These timings are relevant for LPDDR2/LPDDR3 only */
559         /* writel((MCTL_TCKDPDE << 24) | (MCTL_TCKDPX << 16) |
560                (MCTL_TCKCSX << 0), &mctl_ctl->dramtmg[6]); */
561
562         /* printf("DRAMTMG7 reset value: 0x%x\n",
563                 readl(&mctl_ctl->dramtmg[7])); */
564         /* DRAMTMG7 reset value: 0x202 */
565         /* DRAMTMG7 should contain t_ckpde and t_ckpdx: check reset values!!! */
566         /* printf("DRAMTMG8 reset value: 0x%x\n",
567                 readl(&mctl_ctl->dramtmg[8])); */
568         /* DRAMTMG8 reset value: 0x44 */
569
570         writel((MCTL_DIV32(tXSDLL) << 0), &mctl_ctl->dramtmg[8]);
571
572         writel((MCTL_DIV32(tREFI) << 16) | (MCTL_DIV2(tRFC) << 0),
573                &mctl_ctl->rfshtmg);
574
575         if (para->dram_type == DRAM_TYPE_DDR3) {
576                 writel((2 << 24) | ((MCTL_DIV2(CL) - 2) << 16) |
577                        (1 << 8) | ((MCTL_DIV2(CWL) - 2) << 0),
578                         &mctl_ctl->dfitmg[0]);
579         } else {
580                 /* TODO */
581         }
582
583         /* TODO: handle the case of the write latency domain going to 0 ... */
584
585         /*
586          * Disable dfi_init_complete_en (the triggering of the SDRAM
587          * initialisation when the PHY initialisation completes).
588          */
589         clrbits_le32(&mctl_ctl->dfimisc, MCTL_DFIMISC_DFI_INIT_COMPLETE_EN);
590         /* Disable the automatic generation of DLL calibration requests */
591         setbits_le32(&mctl_ctl->dfiupd[0], MCTL_DFIUPD0_DIS_AUTO_CTRLUPD);
592
593         /* A80-Q7: 2T, 1 rank, DDR3, full-32bit-DQ */
594         /* TODO: make 2T and BUSWIDTH configurable  */
595         writel(MCTL_MSTR_DEVICETYPE(para->dram_type) |
596                MCTL_MSTR_BURSTLENGTH(para->dram_type) |
597                MCTL_MSTR_ACTIVERANKS(para->rank) |
598                MCTL_MSTR_2TMODE | MCTL_MSTR_BUSWIDTH32,
599                &mctl_ctl->mstr);
600
601         if (para->dram_type == DRAM_TYPE_DDR3) {
602                 writel(MCTL_ZQCTRL0_TZQCL(MCTL_DIV2(tZQoper)) |
603                        (MCTL_DIV2(tZQCS)), &mctl_ctl->zqctrl[0]);
604                 /*
605                  * TODO: is the following really necessary as the bottom
606                  * half should already be 0x100 and the upper half should
607                  * be ignored for a DDR3 device???
608                  */
609                 writel(MCTL_ZQCTRL1_TZQSI_x1024(0x100),
610                        &mctl_ctl->zqctrl[1]);
611         } else {
612                 writel(MCTL_ZQCTRL0_TZQCL(0x200) | MCTL_ZQCTRL0_TZQCS(0x40),
613                        &mctl_ctl->zqctrl[0]);
614                 writel(MCTL_ZQCTRL1_TZQRESET(0x28) |
615                        MCTL_ZQCTRL1_TZQSI_x1024(0x100),
616                        &mctl_ctl->zqctrl[1]);
617         }
618
619         /* Assert dfi_init_complete signal */
620         setbits_le32(&mctl_ctl->dfimisc, MCTL_DFIMISC_DFI_INIT_COMPLETE_EN);
621         /* Disable auto-refresh */
622         setbits_le32(&mctl_ctl->rfshctl3, MCTL_RFSHCTL3_DIS_AUTO_REFRESH);
623
624         /* PHY initialisation */
625
626         /* TODO: make 2T and 8-bank mode configurable  */
627         writel(MCTL_PHY_DCR_BYTEMASK | MCTL_PHY_DCR_2TMODE |
628                MCTL_PHY_DCR_DDR8BNK | MCTL_PHY_DRAMMODE_DDR3,
629                &mctl_phy->dcr);
630
631         /* For LPDDR2 or LPDDR3, set DQSGX to 0 before training. */
632         if (para->dram_type != DRAM_TYPE_DDR3)
633                 clrbits_le32(&mctl_phy->dsgcr, (3 << 6));
634
635         writel(mr[0], &mctl_phy->mr0);
636         writel(mr[1], &mctl_phy->mr1);
637         writel(mr[2], &mctl_phy->mr2);
638         writel(mr[3], &mctl_phy->mr3);
639
640         /*
641          * The DFI PHY is running at full rate. We thus use the actual
642          * timings in clock cycles here.
643          */
644         writel((tRC << 26) | (tRRD << 22) | (tRAS << 16) |
645                (tRCD << 12) | (tRP << 8) | (tWTR << 4) | (tRTP << 0),
646                 &mctl_phy->dtpr[0]);
647         writel((tMRD << 0) | ((tMOD - 12) << 2) | (tFAW << 5) |
648                (tRFC << 11) | (tWLMRD << 20) | (tWLO << 26),
649                &mctl_phy->dtpr[1]);
650         writel((tXS << 0) | (MAX(tXP, tXPDLL) << 10) |
651                (tCKE << 15) | (tDLLK << 19) |
652                (MCTL_PHY_TRTODT << 29) | (MCTL_PHY_TRTW << 30) |
653                (((tCCD - 4) & 0x1) << 31),
654                &mctl_phy->dtpr[2]);
655
656         /* tDQSCK and tDQSCKmax are used LPDDR2/LPDDR3 */
657         /* writel((tDQSCK << 0) | (tDQSCKMAX << 3), &mctl_phy->dtpr[3]); */
658
659         /*
660          * We use the same values used by Allwinner's Boot0 for the PTR
661          * (PHY timing register) configuration that is tied to the PHY
662          * implementation.
663          */
664         writel(0x42C21590, &mctl_phy->ptr[0]);
665         writel(0xD05612C0, &mctl_phy->ptr[1]);
666         if (para->dram_type == DRAM_TYPE_DDR3) {
667                 const unsigned int tdinit0 = 500 * CONFIG_DRAM_CLK; /* 500us */
668                 const unsigned int tdinit1 = (360 * CONFIG_DRAM_CLK + 999) /
669                         1000; /* 360ns */
670                 const unsigned int tdinit2 = 200 * CONFIG_DRAM_CLK; /* 200us */
671                 const unsigned int tdinit3 = CONFIG_DRAM_CLK; /* 1us */
672
673                 writel((tdinit1 << 20) | tdinit0, &mctl_phy->ptr[3]);
674                 writel((tdinit3 << 18) | tdinit2, &mctl_phy->ptr[4]);
675         } else {
676                 /* LPDDR2 or LPDDR3 */
677                 const unsigned int tdinit0 = (100 * CONFIG_DRAM_CLK + 999) /
678                         1000; /* 100ns */
679                 const unsigned int tdinit1 = 200 * CONFIG_DRAM_CLK; /* 200us */
680                 const unsigned int tdinit2 = 22 * CONFIG_DRAM_CLK; /* 11us */
681                 const unsigned int tdinit3 = 2 * CONFIG_DRAM_CLK; /* 2us */
682
683                 writel((tdinit1 << 20) | tdinit0, &mctl_phy->ptr[3]);
684                 writel((tdinit3 << 18) | tdinit2, &mctl_phy->ptr[4]);
685         }
686
687         /* TEST ME */
688         writel(0x00203131, &mctl_phy->acmdlr);
689
690         /* TODO: can we enable this for 2 ranks, even when we don't know yet */
691         writel(MCTL_DTCR_DEFAULT | MCTL_DTCR_RANKEN(para->rank),
692                &mctl_phy->dtcr);
693
694         /* TODO: half width */
695         debug("DX2GCR0 reset: 0x%x\n", readl(&mctl_phy->dx[2].gcr[0]));
696         writel(0x7C000285, &mctl_phy->dx[2].gcr[0]);
697         writel(0x7C000285, &mctl_phy->dx[3].gcr[0]);
698
699         clrsetbits_le32(&mctl_phy->zq[0].pr, 0xff,
700                         (CONFIG_DRAM_ZQ >>  0) & 0xff);  /* CK/CA */
701         clrsetbits_le32(&mctl_phy->zq[1].pr, 0xff,
702                         (CONFIG_DRAM_ZQ >>  8) & 0xff);  /* DX0/DX1 */
703         clrsetbits_le32(&mctl_phy->zq[2].pr, 0xff,
704                         (CONFIG_DRAM_ZQ >> 16) & 0xff);  /* DX2/DX3 */
705
706         /* TODO: make configurable & implement non-ODT path */
707         if (1) {
708                 int lane;
709                 for (lane = 0; lane < 4; ++lane) {
710                         clrbits_le32(&mctl_phy->dx[lane].gcr[2], 0xffff);
711                         clrbits_le32(&mctl_phy->dx[lane].gcr[3],
712                                      (0x3<<12) | (0x3<<4));
713                 }
714         } else {
715                 /* TODO: check */
716                 int lane;
717                 for (lane = 0; lane < 4; ++lane) {
718                         clrsetbits_le32(&mctl_phy->dx[lane].gcr[2], 0xffff,
719                                         0xaaaa);
720                         if (para->dram_type == DRAM_TYPE_DDR3)
721                                 setbits_le32(&mctl_phy->dx[lane].gcr[3],
722                                              (0x3<<12) | (0x3<<4));
723                         else
724                                 setbits_le32(&mctl_phy->dx[lane].gcr[3],
725                                              0x00000012);
726                 }
727         }
728
729         writel(0x04058D02, &mctl_phy->zq[0].cr); /* CK/CA */
730         writel(0x04058D02, &mctl_phy->zq[1].cr); /* DX0/DX1 */
731         writel(0x04058D02, &mctl_phy->zq[2].cr); /* DX2/DX3 */
732
733         /* Disable auto-refresh prior to data training */
734         setbits_le32(&mctl_ctl->rfshctl3, MCTL_RFSHCTL3_DIS_AUTO_REFRESH);
735
736         setbits_le32(&mctl_phy->dsgcr, 0xf << 24); /* unclear what this is... */
737         /* TODO: IODDRM (IO DDR-MODE) for DDR3L */
738         clrsetbits_le32(&mctl_phy->pgcr[1],
739                         MCTL_PGCR1_ZCKSEL_MASK,
740                         MCTL_PGCR1_IODDRM_DDR3 | MCTL_PGCR1_INHVT_EN);
741
742         setbits_le32(&mctl_phy->pllcr, 0x3 << 19); /* PLL frequency select */
743         /* TODO: single-channel PLL mode??? missing */
744         setbits_le32(&mctl_phy->pllcr,
745                      MCTL_PLLGCR_PLL_BYPASS | MCTL_PLLGCR_PLL_POWERDOWN);
746         /* setbits_le32(&mctl_phy->pir, MCTL_PIR_PLL_BYPASS); included below */
747
748         /* Disable VT compensation */
749         clrbits_le32(&mctl_phy->pgcr[0], 0x3f);
750
751         /* TODO: "other" PLL mode ... 0x20000 seems to be the PLL Bypass */
752         if (para->dram_type == DRAM_TYPE_DDR3)
753                 clrsetbits_le32(&mctl_phy->pir, MCTL_PIR_MASK, 0x20df3);
754         else
755                 clrsetbits_le32(&mctl_phy->pir, MCTL_PIR_MASK, 0x2c573);
756
757         sdelay(10000); /* XXX necessary? */
758
759         /* Wait for the INIT bit to clear itself... */
760         while ((readl(&mctl_phy->pir) & MCTL_PIR_INIT) != MCTL_PIR_INIT) {
761                 /* not done yet -- keep spinning */
762                 debug("MCTL_PIR_INIT not set\n");
763                 sdelay(1000);
764                 /* TODO: implement timeout */
765         }
766
767         /* TODO: not used --- there's a "2rank debug" section here */
768
769         /* Original dram init code which may come in handy later
770         ********************************************************
771          * LPDDR2 and LPDDR3 *
772         if ((para->dram_type) == 6 || (para->dram_type) == 7) {
773                 reg_val = mctl_read_w(P0_DSGCR + ch_offset);
774                 reg_val &= (~(0x3<<6));         * set DQSGX to 1 *
775                 reg_val |= (0x1<<6);            * dqs gate extend *
776                 mctl_write_w(P0_DSGCR + ch_offset, reg_val);
777                 dram_dbg("DQS Gate Extend Enable!\n", ch_index);
778         }
779
780          * Disable ZCAL after initial--for nand dma debug--20140330 by YSZ *
781         if (para->dram_tpr13 & (0x1<<31)) {
782                 reg_val = mctl_read_w(P0_ZQ0CR + ch_offset);
783                 reg_val |= (0x7<<11);
784                 mctl_write_w(P0_ZQ0CR + ch_offset, reg_val);
785         }
786         ********************************************************
787         */
788
789         /*
790          * TODO: more 2-rank support
791          * (setting the "dqs gate delay to average between 2 rank")
792          */
793
794         /* check if any errors are set */
795         if (readl(&mctl_phy->pgsr[0]) & MCTL_PGSR0_ERRORS) {
796                 debug("Channel %d unavailable!\n", ch_index);
797                 return 0;
798         } else{
799                 /* initial OK */
800                 debug("Channel %d OK!\n", ch_index);
801                 /* return 1; */
802         }
803
804         while ((readl(&mctl_ctl->stat) & 0x1) != 0x1) {
805                 debug("Waiting for INIT to be done (controller to come up into 'normal operating' mode\n");
806                 sdelay(100000);
807                 /* init not done */
808                 /* TODO: implement time-out */
809         }
810         debug("done\n");
811
812         /* "DDR is controller by contoller" */
813         clrbits_le32(&mctl_phy->pgcr[3], (1 << 25));
814
815         /* TODO: is the following necessary? */
816         debug("DFIMISC before writing 0: 0x%x\n", readl(&mctl_ctl->dfimisc));
817         writel(0, &mctl_ctl->dfimisc);
818
819         /* Enable auto-refresh */
820         clrbits_le32(&mctl_ctl->rfshctl3, MCTL_RFSHCTL3_DIS_AUTO_REFRESH);
821
822         debug("channel_init complete\n");
823         return 1;
824 }
825
826 signed int DRAMC_get_dram_size(void)
827 {
828         struct sunxi_mctl_com_reg * const mctl_com =
829                 (struct sunxi_mctl_com_reg *)SUNXI_DRAM_COM_BASE;
830
831         unsigned int reg_val;
832         unsigned int dram_size;
833         unsigned int temp;
834
835         reg_val = readl(&mctl_com->cr);
836
837         temp = (reg_val >> 8) & 0xf;    /* page size code */
838         dram_size = (temp - 6);         /* (1 << dram_size) * 512Bytes */
839
840         temp = (reg_val >> 4) & 0xf;    /* row width code */
841         dram_size += (temp + 1);        /* (1 << dram_size) * 512Bytes */
842
843         temp = (reg_val >> 2) & 0x3;    /* bank number code */
844         dram_size += (temp + 2);        /* (1 << dram_size) * 512Bytes */
845
846         temp = reg_val & 0x3;           /* rank number code */
847         dram_size += temp;              /* (1 << dram_size) * 512Bytes */
848
849         temp = (reg_val >> 19) & 0x1;   /* channel number code */
850         dram_size += temp;              /* (1 << dram_size) * 512Bytes */
851
852         dram_size = dram_size - 11;     /* (1 << dram_size) MBytes */
853
854         return 1 << dram_size;
855 }
856
857 unsigned long sunxi_dram_init(void)
858 {
859         struct sunxi_mctl_com_reg * const mctl_com =
860                 (struct sunxi_mctl_com_reg *)SUNXI_DRAM_COM_BASE;
861
862         struct dram_sun9i_cl_cwl_timing cl_cwl[] = {
863                 { .CL =  5, .CWL = 5, .tCKmin = 3000, .tCKmax = 3300 },
864                 { .CL =  6, .CWL = 5, .tCKmin = 2500, .tCKmax = 3300 },
865                 { .CL =  8, .CWL = 6, .tCKmin = 1875, .tCKmax = 2500 },
866                 { .CL = 10, .CWL = 7, .tCKmin = 1500, .tCKmax = 1875 },
867                 { .CL = 11, .CWL = 8, .tCKmin = 1250, .tCKmax = 1500 }
868         };
869
870         /* Set initial parameters, these get modified by the autodetect code */
871         struct dram_sun9i_para para = {
872                 .dram_type = DRAM_TYPE_DDR3,
873                 .bus_width = 32,
874                 .chan = 2,
875                 .rank = 1,
876                 /* .rank = 2, */
877                 .page_size = 4096,
878                 /* .rows = 16, */
879                 .rows = 15,
880
881                 /* CL/CWL table for the speed bin */
882                 .cl_cwl_table = cl_cwl,
883                 .cl_cwl_numentries = sizeof(cl_cwl) /
884                         sizeof(struct dram_sun9i_cl_cwl_timing),
885
886                 /* timings */
887                 .tREFI = 7800,  /* 7.8us (up to 85 degC) */
888                 .tRFC  = 260,   /* 260ns for 4GBit devices */
889                                 /* 350ns @ 8GBit */
890
891                 .tRCD  = 13750,
892                 .tRP   = 13750,
893                 .tRC   = 48750,
894                 .tRAS  = 35000,
895
896                 .tDLLK = 512,
897                 .tRTP  = { .ck = 4, .ps = 7500 },
898                 .tWTR  = { .ck = 4, .ps = 7500 },
899                 .tWR   = 15,
900                 .tMRD  = 4,
901                 .tMOD  = { .ck = 12, .ps = 15000 },
902                 .tCCD  = 4,
903                 .tRRD  = { .ck = 4, .ps = 7500 },
904                 .tFAW  = 40,
905
906                 /* calibration timing */
907                 /* .tZQinit = { .ck = 512, .ps = 640000 }, */
908                 .tZQoper = { .ck = 256, .ps = 320000 },
909                 .tZQCS   = { .ck = 64,  .ps = 80000 },
910
911                 /* reset timing */
912                 /* .tXPR  = { .ck = 5, .ps = 10000 }, */
913
914                 /* self-refresh timings */
915                 .tXS  = { .ck = 5, .ps = 10000 },
916                 .tXSDLL = 512,
917                 .tCKSRE = { .ck = 5, .ps = 10000 },
918                 .tCKSRX = { .ck = 5, .ps = 10000 },
919
920                 /* power-down timings */
921                 .tXP = { .ck = 3, .ps = 6000 },
922                 .tXPDLL = { .ck = 10, .ps = 24000 },
923                 .tCKE = { .ck = 3, .ps = 5000 },
924
925                 /* write leveling timings */
926                 .tWLMRD = 40,
927                 /* .tWLDQSEN = 25, */
928                 .tWLO = 7500,
929                 /* .tWLOE = 2000, */
930         };
931
932         /*
933          * Disable A80 internal 240 ohm resistor.
934          *
935          * This code sequence is adapated from Allwinner's Boot0 (see
936          * https://github.com/allwinner-zh/bootloader.git), as there
937          * is no documentation for these two registers in the R_PRCM
938          * block.
939          */
940         setbits_le32(SUNXI_PRCM_BASE + 0x1e0, (0x3 << 8));
941         writel(0, SUNXI_PRCM_BASE + 0x1e8);
942
943         mctl_sys_init();
944
945         if (!mctl_channel_init(0, &para))
946                 return 0;
947
948         /* dual-channel */
949         if (!mctl_channel_init(1, &para)) {
950                 /* disable channel 1 */
951                 clrsetbits_le32(&mctl_com->cr, MCTL_CR_CHANNEL_MASK,
952                                 MCTL_CR_CHANNEL_SINGLE);
953                 /* disable channel 1 global clock */
954                 clrbits_le32(&mctl_com->cr, MCTL_CCR_CH1_CLK_EN);
955         }
956
957         mctl_com_init(&para);
958
959         /* return the proper RAM size */
960         return DRAMC_get_dram_size() << 20;
961 }