]> git.sur5r.net Git - u-boot/blob - drivers/ddr/altera/sequencer.c
1718ebf1c49c03e3b12c20c6667939928584b08e
[u-boot] / drivers / ddr / altera / sequencer.c
1 /*
2  * Copyright Altera Corporation (C) 2012-2015
3  *
4  * SPDX-License-Identifier:    BSD-3-Clause
5  */
6
7 #include <common.h>
8 #include <asm/io.h>
9 #include <asm/arch/sdram.h>
10 #include "sequencer.h"
11 #include "sequencer_auto.h"
12 #include "sequencer_auto_ac_init.h"
13 #include "sequencer_auto_inst_init.h"
14 #include "sequencer_defines.h"
15
16 static void scc_mgr_load_dqs_for_write_group(uint32_t write_group);
17
18 static struct socfpga_sdr_rw_load_manager *sdr_rw_load_mgr_regs =
19         (struct socfpga_sdr_rw_load_manager *)(SDR_PHYGRP_RWMGRGRP_ADDRESS | 0x800);
20
21 static struct socfpga_sdr_rw_load_jump_manager *sdr_rw_load_jump_mgr_regs =
22         (struct socfpga_sdr_rw_load_jump_manager *)(SDR_PHYGRP_RWMGRGRP_ADDRESS | 0xC00);
23
24 static struct socfpga_sdr_reg_file *sdr_reg_file =
25         (struct socfpga_sdr_reg_file *)SDR_PHYGRP_REGFILEGRP_ADDRESS;
26
27 static struct socfpga_sdr_scc_mgr *sdr_scc_mgr =
28         (struct socfpga_sdr_scc_mgr *)(SDR_PHYGRP_SCCGRP_ADDRESS | 0xe00);
29
30 static struct socfpga_phy_mgr_cmd *phy_mgr_cmd =
31         (struct socfpga_phy_mgr_cmd *)SDR_PHYGRP_PHYMGRGRP_ADDRESS;
32
33 static struct socfpga_phy_mgr_cfg *phy_mgr_cfg =
34         (struct socfpga_phy_mgr_cfg *)(SDR_PHYGRP_PHYMGRGRP_ADDRESS | 0x40);
35
36 static struct socfpga_data_mgr *data_mgr =
37         (struct socfpga_data_mgr *)SDR_PHYGRP_DATAMGRGRP_ADDRESS;
38
39 static struct socfpga_sdr_ctrl *sdr_ctrl =
40         (struct socfpga_sdr_ctrl *)SDR_CTRLGRP_ADDRESS;
41
42 #define DELTA_D         1
43
44 /*
45  * In order to reduce ROM size, most of the selectable calibration steps are
46  * decided at compile time based on the user's calibration mode selection,
47  * as captured by the STATIC_CALIB_STEPS selection below.
48  *
49  * However, to support simulation-time selection of fast simulation mode, where
50  * we skip everything except the bare minimum, we need a few of the steps to
51  * be dynamic.  In those cases, we either use the DYNAMIC_CALIB_STEPS for the
52  * check, which is based on the rtl-supplied value, or we dynamically compute
53  * the value to use based on the dynamically-chosen calibration mode
54  */
55
56 #define DLEVEL 0
57 #define STATIC_IN_RTL_SIM 0
58 #define STATIC_SKIP_DELAY_LOOPS 0
59
60 #define STATIC_CALIB_STEPS (STATIC_IN_RTL_SIM | CALIB_SKIP_FULL_TEST | \
61         STATIC_SKIP_DELAY_LOOPS)
62
63 /* calibration steps requested by the rtl */
64 uint16_t dyn_calib_steps;
65
66 /*
67  * To make CALIB_SKIP_DELAY_LOOPS a dynamic conditional option
68  * instead of static, we use boolean logic to select between
69  * non-skip and skip values
70  *
71  * The mask is set to include all bits when not-skipping, but is
72  * zero when skipping
73  */
74
75 uint16_t skip_delay_mask;       /* mask off bits when skipping/not-skipping */
76
77 #define SKIP_DELAY_LOOP_VALUE_OR_ZERO(non_skip_value) \
78         ((non_skip_value) & skip_delay_mask)
79
80 struct gbl_type *gbl;
81 struct param_type *param;
82 uint32_t curr_shadow_reg;
83
84 static uint32_t rw_mgr_mem_calibrate_write_test(uint32_t rank_bgn,
85         uint32_t write_group, uint32_t use_dm,
86         uint32_t all_correct, uint32_t *bit_chk, uint32_t all_ranks);
87
88 static void set_failing_group_stage(uint32_t group, uint32_t stage,
89         uint32_t substage)
90 {
91         /*
92          * Only set the global stage if there was not been any other
93          * failing group
94          */
95         if (gbl->error_stage == CAL_STAGE_NIL)  {
96                 gbl->error_substage = substage;
97                 gbl->error_stage = stage;
98                 gbl->error_group = group;
99         }
100 }
101
102 static void reg_file_set_group(u16 set_group)
103 {
104         clrsetbits_le32(&sdr_reg_file->cur_stage, 0xffff0000, set_group << 16);
105 }
106
107 static void reg_file_set_stage(u8 set_stage)
108 {
109         clrsetbits_le32(&sdr_reg_file->cur_stage, 0xffff, set_stage & 0xff);
110 }
111
112 static void reg_file_set_sub_stage(u8 set_sub_stage)
113 {
114         set_sub_stage &= 0xff;
115         clrsetbits_le32(&sdr_reg_file->cur_stage, 0xff00, set_sub_stage << 8);
116 }
117
118 static void initialize(void)
119 {
120         debug("%s:%d\n", __func__, __LINE__);
121         /* USER calibration has control over path to memory */
122         /*
123          * In Hard PHY this is a 2-bit control:
124          * 0: AFI Mux Select
125          * 1: DDIO Mux Select
126          */
127         writel(0x3, &phy_mgr_cfg->mux_sel);
128
129         /* USER memory clock is not stable we begin initialization  */
130         writel(0, &phy_mgr_cfg->reset_mem_stbl);
131
132         /* USER calibration status all set to zero */
133         writel(0, &phy_mgr_cfg->cal_status);
134
135         writel(0, &phy_mgr_cfg->cal_debug_info);
136
137         if ((dyn_calib_steps & CALIB_SKIP_ALL) != CALIB_SKIP_ALL) {
138                 param->read_correct_mask_vg  = ((uint32_t)1 <<
139                         (RW_MGR_MEM_DQ_PER_READ_DQS /
140                         RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS)) - 1;
141                 param->write_correct_mask_vg = ((uint32_t)1 <<
142                         (RW_MGR_MEM_DQ_PER_READ_DQS /
143                         RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS)) - 1;
144                 param->read_correct_mask     = ((uint32_t)1 <<
145                         RW_MGR_MEM_DQ_PER_READ_DQS) - 1;
146                 param->write_correct_mask    = ((uint32_t)1 <<
147                         RW_MGR_MEM_DQ_PER_WRITE_DQS) - 1;
148                 param->dm_correct_mask       = ((uint32_t)1 <<
149                         (RW_MGR_MEM_DATA_WIDTH / RW_MGR_MEM_DATA_MASK_WIDTH))
150                         - 1;
151         }
152 }
153
154 static void set_rank_and_odt_mask(uint32_t rank, uint32_t odt_mode)
155 {
156         uint32_t odt_mask_0 = 0;
157         uint32_t odt_mask_1 = 0;
158         uint32_t cs_and_odt_mask;
159
160         if (odt_mode == RW_MGR_ODT_MODE_READ_WRITE) {
161                 if (RW_MGR_MEM_NUMBER_OF_RANKS == 1) {
162                         /*
163                          * 1 Rank
164                          * Read: ODT = 0
165                          * Write: ODT = 1
166                          */
167                         odt_mask_0 = 0x0;
168                         odt_mask_1 = 0x1;
169                 } else if (RW_MGR_MEM_NUMBER_OF_RANKS == 2) {
170                         /* 2 Ranks */
171                         if (RW_MGR_MEM_NUMBER_OF_CS_PER_DIMM == 1) {
172                                 /* - Dual-Slot , Single-Rank
173                                  * (1 chip-select per DIMM)
174                                  * OR
175                                  * - RDIMM, 4 total CS (2 CS per DIMM)
176                                  * means 2 DIMM
177                                  * Since MEM_NUMBER_OF_RANKS is 2 they are
178                                  * both single rank
179                                  * with 2 CS each (special for RDIMM)
180                                  * Read: Turn on ODT on the opposite rank
181                                  * Write: Turn on ODT on all ranks
182                                  */
183                                 odt_mask_0 = 0x3 & ~(1 << rank);
184                                 odt_mask_1 = 0x3;
185                         } else {
186                                 /*
187                                  * USER - Single-Slot , Dual-rank DIMMs
188                                  * (2 chip-selects per DIMM)
189                                  * USER Read: Turn on ODT off on all ranks
190                                  * USER Write: Turn on ODT on active rank
191                                  */
192                                 odt_mask_0 = 0x0;
193                                 odt_mask_1 = 0x3 & (1 << rank);
194                         }
195                 } else {
196                         /* 4 Ranks
197                          * Read:
198                          * ----------+-----------------------+
199                          *           |                       |
200                          *           |         ODT           |
201                          * Read From +-----------------------+
202                          *   Rank    |  3  |  2  |  1  |  0  |
203                          * ----------+-----+-----+-----+-----+
204                          *     0     |  0  |  1  |  0  |  0  |
205                          *     1     |  1  |  0  |  0  |  0  |
206                          *     2     |  0  |  0  |  0  |  1  |
207                          *     3     |  0  |  0  |  1  |  0  |
208                          * ----------+-----+-----+-----+-----+
209                          *
210                          * Write:
211                          * ----------+-----------------------+
212                          *           |                       |
213                          *           |         ODT           |
214                          * Write To  +-----------------------+
215                          *   Rank    |  3  |  2  |  1  |  0  |
216                          * ----------+-----+-----+-----+-----+
217                          *     0     |  0  |  1  |  0  |  1  |
218                          *     1     |  1  |  0  |  1  |  0  |
219                          *     2     |  0  |  1  |  0  |  1  |
220                          *     3     |  1  |  0  |  1  |  0  |
221                          * ----------+-----+-----+-----+-----+
222                          */
223                         switch (rank) {
224                         case 0:
225                                 odt_mask_0 = 0x4;
226                                 odt_mask_1 = 0x5;
227                                 break;
228                         case 1:
229                                 odt_mask_0 = 0x8;
230                                 odt_mask_1 = 0xA;
231                                 break;
232                         case 2:
233                                 odt_mask_0 = 0x1;
234                                 odt_mask_1 = 0x5;
235                                 break;
236                         case 3:
237                                 odt_mask_0 = 0x2;
238                                 odt_mask_1 = 0xA;
239                                 break;
240                         }
241                 }
242         } else {
243                 odt_mask_0 = 0x0;
244                 odt_mask_1 = 0x0;
245         }
246
247         cs_and_odt_mask =
248                 (0xFF & ~(1 << rank)) |
249                 ((0xFF & odt_mask_0) << 8) |
250                 ((0xFF & odt_mask_1) << 16);
251         writel(cs_and_odt_mask, SDR_PHYGRP_RWMGRGRP_ADDRESS |
252                                 RW_MGR_SET_CS_AND_ODT_MASK_OFFSET);
253 }
254
255 /**
256  * scc_mgr_set() - Set SCC Manager register
257  * @off:        Base offset in SCC Manager space
258  * @grp:        Read/Write group
259  * @val:        Value to be set
260  *
261  * This function sets the SCC Manager (Scan Chain Control Manager) register.
262  */
263 static void scc_mgr_set(u32 off, u32 grp, u32 val)
264 {
265         writel(val, SDR_PHYGRP_SCCGRP_ADDRESS | off | (grp << 2));
266 }
267
268 /**
269  * scc_mgr_initialize() - Initialize SCC Manager registers
270  *
271  * Initialize SCC Manager registers.
272  */
273 static void scc_mgr_initialize(void)
274 {
275         /*
276          * Clear register file for HPS. 16 (2^4) is the size of the
277          * full register file in the scc mgr:
278          *      RFILE_DEPTH = 1 + log2(MEM_DQ_PER_DQS + 1 + MEM_DM_PER_DQS +
279          *                             MEM_IF_READ_DQS_WIDTH - 1);
280          */
281         int i;
282
283         for (i = 0; i < 16; i++) {
284                 debug_cond(DLEVEL == 1, "%s:%d: Clearing SCC RFILE index %u\n",
285                            __func__, __LINE__, i);
286                 scc_mgr_set(SCC_MGR_HHP_RFILE_OFFSET, 0, i);
287         }
288 }
289
290 static void scc_mgr_set_dqdqs_output_phase(uint32_t write_group, uint32_t phase)
291 {
292         scc_mgr_set(SCC_MGR_DQDQS_OUT_PHASE_OFFSET, write_group, phase);
293 }
294
295 static void scc_mgr_set_dqs_bus_in_delay(uint32_t read_group, uint32_t delay)
296 {
297         scc_mgr_set(SCC_MGR_DQS_IN_DELAY_OFFSET, read_group, delay);
298 }
299
300 static void scc_mgr_set_dqs_en_phase(uint32_t read_group, uint32_t phase)
301 {
302         scc_mgr_set(SCC_MGR_DQS_EN_PHASE_OFFSET, read_group, phase);
303 }
304
305 static void scc_mgr_set_dqs_en_delay(uint32_t read_group, uint32_t delay)
306 {
307         scc_mgr_set(SCC_MGR_DQS_EN_DELAY_OFFSET, read_group, delay);
308 }
309
310 static void scc_mgr_set_dqs_io_in_delay(uint32_t write_group, uint32_t delay)
311 {
312         scc_mgr_set(SCC_MGR_IO_IN_DELAY_OFFSET, RW_MGR_MEM_DQ_PER_WRITE_DQS,
313                     delay);
314 }
315
316 static void scc_mgr_set_dq_in_delay(uint32_t dq_in_group, uint32_t delay)
317 {
318         scc_mgr_set(SCC_MGR_IO_IN_DELAY_OFFSET, dq_in_group, delay);
319 }
320
321 static void scc_mgr_set_dq_out1_delay(uint32_t dq_in_group, uint32_t delay)
322 {
323         scc_mgr_set(SCC_MGR_IO_OUT1_DELAY_OFFSET, dq_in_group, delay);
324 }
325
326 static void scc_mgr_set_dqs_out1_delay(uint32_t write_group,
327                                               uint32_t delay)
328 {
329         scc_mgr_set(SCC_MGR_IO_OUT1_DELAY_OFFSET, RW_MGR_MEM_DQ_PER_WRITE_DQS,
330                     delay);
331 }
332
333 static void scc_mgr_set_dm_out1_delay(uint32_t dm, uint32_t delay)
334 {
335         scc_mgr_set(SCC_MGR_IO_OUT1_DELAY_OFFSET,
336                     RW_MGR_MEM_DQ_PER_WRITE_DQS + 1 + dm,
337                     delay);
338 }
339
340 /* load up dqs config settings */
341 static void scc_mgr_load_dqs(uint32_t dqs)
342 {
343         writel(dqs, &sdr_scc_mgr->dqs_ena);
344 }
345
346 /* load up dqs io config settings */
347 static void scc_mgr_load_dqs_io(void)
348 {
349         writel(0, &sdr_scc_mgr->dqs_io_ena);
350 }
351
352 /* load up dq config settings */
353 static void scc_mgr_load_dq(uint32_t dq_in_group)
354 {
355         writel(dq_in_group, &sdr_scc_mgr->dq_ena);
356 }
357
358 /* load up dm config settings */
359 static void scc_mgr_load_dm(uint32_t dm)
360 {
361         writel(dm, &sdr_scc_mgr->dm_ena);
362 }
363
364 static void scc_mgr_set_dqs_en_phase_all_ranks(uint32_t read_group,
365                                                uint32_t phase)
366 {
367         uint32_t r;
368
369         for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS;
370              r += NUM_RANKS_PER_SHADOW_REG) {
371                 scc_mgr_set_dqs_en_phase(read_group, phase);
372
373                 /*
374                  * USER although the h/w doesn't support different phases per
375                  * shadow register, for simplicity our scc manager modeling
376                  * keeps different phase settings per shadow reg, and it's
377                  * important for us to keep them in sync to match h/w.
378                  * for efficiency, the scan chain update should occur only
379                  * once to sr0.
380                  */
381
382                 if (r == 0) {
383                         writel(read_group, &sdr_scc_mgr->dqs_ena);
384                         writel(0, &sdr_scc_mgr->update);
385                 }
386         }
387 }
388
389 static void scc_mgr_set_dqdqs_output_phase_all_ranks(uint32_t write_group,
390                                                      uint32_t phase)
391 {
392         uint32_t r;
393
394         for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS;
395              r += NUM_RANKS_PER_SHADOW_REG) {
396                 scc_mgr_set_dqdqs_output_phase(write_group, phase);
397
398                 /*
399                  * USER although the h/w doesn't support different phases per
400                  * shadow register, for simplicity our scc manager modeling
401                  * keeps different phase settings per shadow reg, and it's
402                  * important for us to keep them in sync to match h/w.
403                  * for efficiency, the scan chain update should occur only
404                  * once to sr0.
405                  */
406
407                 if (r == 0) {
408                         writel(write_group, &sdr_scc_mgr->dqs_ena);
409                         writel(0, &sdr_scc_mgr->update);
410                 }
411         }
412 }
413
414 static void scc_mgr_set_dqs_en_delay_all_ranks(uint32_t read_group,
415                                                uint32_t delay)
416 {
417         uint32_t r;
418
419         for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS;
420                 r += NUM_RANKS_PER_SHADOW_REG) {
421                 scc_mgr_set_dqs_en_delay(read_group, delay);
422
423                 /*
424                  * In shadow register mode, the T11 settings are stored in
425                  * registers in the core, which are updated by the DQS_ENA
426                  * signals. Not issuing the SCC_MGR_UPD command allows us to
427                  * save lots of rank switching overhead, by calling
428                  * select_shadow_regs_for_update with update_scan_chains
429                  * set to 0.
430                  */
431
432                 writel(read_group, &sdr_scc_mgr->dqs_ena);
433                 writel(0, &sdr_scc_mgr->update);
434         }
435         /*
436          * In shadow register mode, the T11 settings are stored in
437          * registers in the core, which are updated by the DQS_ENA
438          * signals. Not issuing the SCC_MGR_UPD command allows us to
439          * save lots of rank switching overhead, by calling
440          * select_shadow_regs_for_update with update_scan_chains
441          * set to 0.
442          */
443         writel(0, &sdr_scc_mgr->update);
444 }
445
446 static void scc_mgr_set_oct_out1_delay(uint32_t write_group, uint32_t delay)
447 {
448         uint32_t read_group;
449         uint32_t addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_OCT_OUT1_DELAY_OFFSET;
450
451         /*
452          * Load the setting in the SCC manager
453          * Although OCT affects only write data, the OCT delay is controlled
454          * by the DQS logic block which is instantiated once per read group.
455          * For protocols where a write group consists of multiple read groups,
456          * the setting must be set multiple times.
457          */
458         for (read_group = write_group * RW_MGR_MEM_IF_READ_DQS_WIDTH /
459              RW_MGR_MEM_IF_WRITE_DQS_WIDTH;
460              read_group < (write_group + 1) * RW_MGR_MEM_IF_READ_DQS_WIDTH /
461              RW_MGR_MEM_IF_WRITE_DQS_WIDTH; ++read_group)
462                 writel(delay, addr + (read_group << 2));
463 }
464
465 static void scc_mgr_set_hhp_extras(void)
466 {
467         /*
468          * Load the fixed setting in the SCC manager
469          * bits: 0:0 = 1'b1   - dqs bypass
470          * bits: 1:1 = 1'b1   - dq bypass
471          * bits: 4:2 = 3'b001   - rfifo_mode
472          * bits: 6:5 = 2'b01  - rfifo clock_select
473          * bits: 7:7 = 1'b0  - separate gating from ungating setting
474          * bits: 8:8 = 1'b0  - separate OE from Output delay setting
475          */
476         uint32_t value = (0<<8) | (0<<7) | (1<<5) | (1<<2) | (1<<1) | (1<<0);
477         uint32_t addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_HHP_GLOBALS_OFFSET;
478
479         writel(value, addr + SCC_MGR_HHP_EXTRAS_OFFSET);
480 }
481
482 /*
483  * USER Zero all DQS config
484  * TODO: maybe rename to scc_mgr_zero_dqs_config (or something)
485  */
486 static void scc_mgr_zero_all(void)
487 {
488         uint32_t i, r;
489
490         /*
491          * USER Zero all DQS config settings, across all groups and all
492          * shadow registers
493          */
494         for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; r +=
495              NUM_RANKS_PER_SHADOW_REG) {
496                 for (i = 0; i < RW_MGR_MEM_IF_READ_DQS_WIDTH; i++) {
497                         /*
498                          * The phases actually don't exist on a per-rank basis,
499                          * but there's no harm updating them several times, so
500                          * let's keep the code simple.
501                          */
502                         scc_mgr_set_dqs_bus_in_delay(i, IO_DQS_IN_RESERVE);
503                         scc_mgr_set_dqs_en_phase(i, 0);
504                         scc_mgr_set_dqs_en_delay(i, 0);
505                 }
506
507                 for (i = 0; i < RW_MGR_MEM_IF_WRITE_DQS_WIDTH; i++) {
508                         scc_mgr_set_dqdqs_output_phase(i, 0);
509                         /* av/cv don't have out2 */
510                         scc_mgr_set_oct_out1_delay(i, IO_DQS_OUT_RESERVE);
511                 }
512         }
513
514         /* multicast to all DQS group enables */
515         writel(0xff, &sdr_scc_mgr->dqs_ena);
516         writel(0, &sdr_scc_mgr->update);
517 }
518
519 static void scc_set_bypass_mode(uint32_t write_group, uint32_t mode)
520 {
521         /* mode = 0 : Do NOT bypass - Half Rate Mode */
522         /* mode = 1 : Bypass - Full Rate Mode */
523
524         /* only need to set once for all groups, pins, dq, dqs, dm */
525         if (write_group == 0) {
526                 debug_cond(DLEVEL == 1, "%s:%d Setting HHP Extras\n", __func__,
527                            __LINE__);
528                 scc_mgr_set_hhp_extras();
529                 debug_cond(DLEVEL == 1, "%s:%d Done Setting HHP Extras\n",
530                           __func__, __LINE__);
531         }
532         /* multicast to all DQ enables */
533         writel(0xff, &sdr_scc_mgr->dq_ena);
534         writel(0xff, &sdr_scc_mgr->dm_ena);
535
536         /* update current DQS IO enable */
537         writel(0, &sdr_scc_mgr->dqs_io_ena);
538
539         /* update the DQS logic */
540         writel(write_group, &sdr_scc_mgr->dqs_ena);
541
542         /* hit update */
543         writel(0, &sdr_scc_mgr->update);
544 }
545
546 static void scc_mgr_load_dqs_for_write_group(uint32_t write_group)
547 {
548         uint32_t read_group;
549         uint32_t addr = (u32)&sdr_scc_mgr->dqs_ena;
550         /*
551          * Although OCT affects only write data, the OCT delay is controlled
552          * by the DQS logic block which is instantiated once per read group.
553          * For protocols where a write group consists of multiple read groups,
554          * the setting must be scanned multiple times.
555          */
556         for (read_group = write_group * RW_MGR_MEM_IF_READ_DQS_WIDTH /
557              RW_MGR_MEM_IF_WRITE_DQS_WIDTH;
558              read_group < (write_group + 1) * RW_MGR_MEM_IF_READ_DQS_WIDTH /
559              RW_MGR_MEM_IF_WRITE_DQS_WIDTH; ++read_group)
560                 writel(read_group, addr);
561 }
562
563 static void scc_mgr_zero_group(uint32_t write_group, uint32_t test_begin,
564                                int32_t out_only)
565 {
566         uint32_t i, r;
567
568         for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; r +=
569                 NUM_RANKS_PER_SHADOW_REG) {
570                 /* Zero all DQ config settings */
571                 for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) {
572                         scc_mgr_set_dq_out1_delay(i, 0);
573                         if (!out_only)
574                                 scc_mgr_set_dq_in_delay(i, 0);
575                 }
576
577                 /* multicast to all DQ enables */
578                 writel(0xff, &sdr_scc_mgr->dq_ena);
579
580                 /* Zero all DM config settings */
581                 for (i = 0; i < RW_MGR_NUM_DM_PER_WRITE_GROUP; i++) {
582                         scc_mgr_set_dm_out1_delay(i, 0);
583                 }
584
585                 /* multicast to all DM enables */
586                 writel(0xff, &sdr_scc_mgr->dm_ena);
587
588                 /* zero all DQS io settings */
589                 if (!out_only)
590                         scc_mgr_set_dqs_io_in_delay(write_group, 0);
591                 /* av/cv don't have out2 */
592                 scc_mgr_set_dqs_out1_delay(write_group, IO_DQS_OUT_RESERVE);
593                 scc_mgr_set_oct_out1_delay(write_group, IO_DQS_OUT_RESERVE);
594                 scc_mgr_load_dqs_for_write_group(write_group);
595
596                 /* multicast to all DQS IO enables (only 1) */
597                 writel(0, &sdr_scc_mgr->dqs_io_ena);
598
599                 /* hit update to zero everything */
600                 writel(0, &sdr_scc_mgr->update);
601         }
602 }
603
604 /*
605  * apply and load a particular input delay for the DQ pins in a group
606  * group_bgn is the index of the first dq pin (in the write group)
607  */
608 static void scc_mgr_apply_group_dq_in_delay(uint32_t write_group,
609                                             uint32_t group_bgn, uint32_t delay)
610 {
611         uint32_t i, p;
612
613         for (i = 0, p = group_bgn; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++, p++) {
614                 scc_mgr_set_dq_in_delay(p, delay);
615                 scc_mgr_load_dq(p);
616         }
617 }
618
619 /* apply and load a particular output delay for the DQ pins in a group */
620 static void scc_mgr_apply_group_dq_out1_delay(uint32_t write_group,
621                                               uint32_t group_bgn,
622                                               uint32_t delay1)
623 {
624         uint32_t i, p;
625
626         for (i = 0, p = group_bgn; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++, p++) {
627                 scc_mgr_set_dq_out1_delay(i, delay1);
628                 scc_mgr_load_dq(i);
629         }
630 }
631
632 /* apply and load a particular output delay for the DM pins in a group */
633 static void scc_mgr_apply_group_dm_out1_delay(uint32_t write_group,
634                                               uint32_t delay1)
635 {
636         uint32_t i;
637
638         for (i = 0; i < RW_MGR_NUM_DM_PER_WRITE_GROUP; i++) {
639                 scc_mgr_set_dm_out1_delay(i, delay1);
640                 scc_mgr_load_dm(i);
641         }
642 }
643
644
645 /* apply and load delay on both DQS and OCT out1 */
646 static void scc_mgr_apply_group_dqs_io_and_oct_out1(uint32_t write_group,
647                                                     uint32_t delay)
648 {
649         scc_mgr_set_dqs_out1_delay(write_group, delay);
650         scc_mgr_load_dqs_io();
651
652         scc_mgr_set_oct_out1_delay(write_group, delay);
653         scc_mgr_load_dqs_for_write_group(write_group);
654 }
655
656 /* apply a delay to the entire output side: DQ, DM, DQS, OCT */
657 static void scc_mgr_apply_group_all_out_delay_add(uint32_t write_group,
658                                                   uint32_t group_bgn,
659                                                   uint32_t delay)
660 {
661         uint32_t i, p, new_delay;
662
663         /* dq shift */
664         for (i = 0, p = group_bgn; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++, p++) {
665                 new_delay = READ_SCC_DQ_OUT2_DELAY;
666                 new_delay += delay;
667
668                 if (new_delay > IO_IO_OUT2_DELAY_MAX) {
669                         debug_cond(DLEVEL == 1, "%s:%d (%u, %u, %u) DQ[%u,%u]:\
670                                    %u > %lu => %lu", __func__, __LINE__,
671                                    write_group, group_bgn, delay, i, p, new_delay,
672                                    (long unsigned int)IO_IO_OUT2_DELAY_MAX,
673                                    (long unsigned int)IO_IO_OUT2_DELAY_MAX);
674                         new_delay = IO_IO_OUT2_DELAY_MAX;
675                 }
676
677                 scc_mgr_load_dq(i);
678         }
679
680         /* dm shift */
681         for (i = 0; i < RW_MGR_NUM_DM_PER_WRITE_GROUP; i++) {
682                 new_delay = READ_SCC_DM_IO_OUT2_DELAY;
683                 new_delay += delay;
684
685                 if (new_delay > IO_IO_OUT2_DELAY_MAX) {
686                         debug_cond(DLEVEL == 1, "%s:%d (%u, %u, %u) DM[%u]:\
687                                    %u > %lu => %lu\n",  __func__, __LINE__,
688                                    write_group, group_bgn, delay, i, new_delay,
689                                    (long unsigned int)IO_IO_OUT2_DELAY_MAX,
690                                    (long unsigned int)IO_IO_OUT2_DELAY_MAX);
691                         new_delay = IO_IO_OUT2_DELAY_MAX;
692                 }
693
694                 scc_mgr_load_dm(i);
695         }
696
697         /* dqs shift */
698         new_delay = READ_SCC_DQS_IO_OUT2_DELAY;
699         new_delay += delay;
700
701         if (new_delay > IO_IO_OUT2_DELAY_MAX) {
702                 debug_cond(DLEVEL == 1, "%s:%d (%u, %u, %u) DQS: %u > %d => %d;"
703                            " adding %u to OUT1\n", __func__, __LINE__,
704                            write_group, group_bgn, delay, new_delay,
705                            IO_IO_OUT2_DELAY_MAX, IO_IO_OUT2_DELAY_MAX,
706                            new_delay - IO_IO_OUT2_DELAY_MAX);
707                 scc_mgr_set_dqs_out1_delay(write_group, new_delay -
708                                            IO_IO_OUT2_DELAY_MAX);
709                 new_delay = IO_IO_OUT2_DELAY_MAX;
710         }
711
712         scc_mgr_load_dqs_io();
713
714         /* oct shift */
715         new_delay = READ_SCC_OCT_OUT2_DELAY;
716         new_delay += delay;
717
718         if (new_delay > IO_IO_OUT2_DELAY_MAX) {
719                 debug_cond(DLEVEL == 1, "%s:%d (%u, %u, %u) DQS: %u > %d => %d;"
720                            " adding %u to OUT1\n", __func__, __LINE__,
721                            write_group, group_bgn, delay, new_delay,
722                            IO_IO_OUT2_DELAY_MAX, IO_IO_OUT2_DELAY_MAX,
723                            new_delay - IO_IO_OUT2_DELAY_MAX);
724                 scc_mgr_set_oct_out1_delay(write_group, new_delay -
725                                            IO_IO_OUT2_DELAY_MAX);
726                 new_delay = IO_IO_OUT2_DELAY_MAX;
727         }
728
729         scc_mgr_load_dqs_for_write_group(write_group);
730 }
731
732 /*
733  * USER apply a delay to the entire output side (DQ, DM, DQS, OCT)
734  * and to all ranks
735  */
736 static void scc_mgr_apply_group_all_out_delay_add_all_ranks(
737         uint32_t write_group, uint32_t group_bgn, uint32_t delay)
738 {
739         uint32_t r;
740
741         for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS;
742                 r += NUM_RANKS_PER_SHADOW_REG) {
743                 scc_mgr_apply_group_all_out_delay_add(write_group,
744                                                       group_bgn, delay);
745                 writel(0, &sdr_scc_mgr->update);
746         }
747 }
748
749 /* optimization used to recover some slots in ddr3 inst_rom */
750 /* could be applied to other protocols if we wanted to */
751 static void set_jump_as_return(void)
752 {
753         /*
754          * to save space, we replace return with jump to special shared
755          * RETURN instruction so we set the counter to large value so that
756          * we always jump
757          */
758         writel(0xff, &sdr_rw_load_mgr_regs->load_cntr0);
759         writel(RW_MGR_RETURN, &sdr_rw_load_jump_mgr_regs->load_jump_add0);
760 }
761
762 /*
763  * should always use constants as argument to ensure all computations are
764  * performed at compile time
765  */
766 static void delay_for_n_mem_clocks(const uint32_t clocks)
767 {
768         uint32_t afi_clocks;
769         uint8_t inner = 0;
770         uint8_t outer = 0;
771         uint16_t c_loop = 0;
772
773         debug("%s:%d: clocks=%u ... start\n", __func__, __LINE__, clocks);
774
775
776         afi_clocks = (clocks + AFI_RATE_RATIO-1) / AFI_RATE_RATIO;
777         /* scale (rounding up) to get afi clocks */
778
779         /*
780          * Note, we don't bother accounting for being off a little bit
781          * because of a few extra instructions in outer loops
782          * Note, the loops have a test at the end, and do the test before
783          * the decrement, and so always perform the loop
784          * 1 time more than the counter value
785          */
786         if (afi_clocks == 0) {
787                 ;
788         } else if (afi_clocks <= 0x100) {
789                 inner = afi_clocks-1;
790                 outer = 0;
791                 c_loop = 0;
792         } else if (afi_clocks <= 0x10000) {
793                 inner = 0xff;
794                 outer = (afi_clocks-1) >> 8;
795                 c_loop = 0;
796         } else {
797                 inner = 0xff;
798                 outer = 0xff;
799                 c_loop = (afi_clocks-1) >> 16;
800         }
801
802         /*
803          * rom instructions are structured as follows:
804          *
805          *    IDLE_LOOP2: jnz cntr0, TARGET_A
806          *    IDLE_LOOP1: jnz cntr1, TARGET_B
807          *                return
808          *
809          * so, when doing nested loops, TARGET_A is set to IDLE_LOOP2, and
810          * TARGET_B is set to IDLE_LOOP2 as well
811          *
812          * if we have no outer loop, though, then we can use IDLE_LOOP1 only,
813          * and set TARGET_B to IDLE_LOOP1 and we skip IDLE_LOOP2 entirely
814          *
815          * a little confusing, but it helps save precious space in the inst_rom
816          * and sequencer rom and keeps the delays more accurate and reduces
817          * overhead
818          */
819         if (afi_clocks <= 0x100) {
820                 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(inner),
821                         &sdr_rw_load_mgr_regs->load_cntr1);
822
823                 writel(RW_MGR_IDLE_LOOP1,
824                         &sdr_rw_load_jump_mgr_regs->load_jump_add1);
825
826                 writel(RW_MGR_IDLE_LOOP1, SDR_PHYGRP_RWMGRGRP_ADDRESS |
827                                           RW_MGR_RUN_SINGLE_GROUP_OFFSET);
828         } else {
829                 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(inner),
830                         &sdr_rw_load_mgr_regs->load_cntr0);
831
832                 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(outer),
833                         &sdr_rw_load_mgr_regs->load_cntr1);
834
835                 writel(RW_MGR_IDLE_LOOP2,
836                         &sdr_rw_load_jump_mgr_regs->load_jump_add0);
837
838                 writel(RW_MGR_IDLE_LOOP2,
839                         &sdr_rw_load_jump_mgr_regs->load_jump_add1);
840
841                 /* hack to get around compiler not being smart enough */
842                 if (afi_clocks <= 0x10000) {
843                         /* only need to run once */
844                         writel(RW_MGR_IDLE_LOOP2, SDR_PHYGRP_RWMGRGRP_ADDRESS |
845                                                   RW_MGR_RUN_SINGLE_GROUP_OFFSET);
846                 } else {
847                         do {
848                                 writel(RW_MGR_IDLE_LOOP2,
849                                         SDR_PHYGRP_RWMGRGRP_ADDRESS |
850                                         RW_MGR_RUN_SINGLE_GROUP_OFFSET);
851                         } while (c_loop-- != 0);
852                 }
853         }
854         debug("%s:%d clocks=%u ... end\n", __func__, __LINE__, clocks);
855 }
856
857 static void rw_mgr_mem_initialize(void)
858 {
859         uint32_t r;
860         uint32_t grpaddr = SDR_PHYGRP_RWMGRGRP_ADDRESS |
861                            RW_MGR_RUN_SINGLE_GROUP_OFFSET;
862
863         debug("%s:%d\n", __func__, __LINE__);
864
865         /* The reset / cke part of initialization is broadcasted to all ranks */
866         writel(RW_MGR_RANK_ALL, SDR_PHYGRP_RWMGRGRP_ADDRESS |
867                                 RW_MGR_SET_CS_AND_ODT_MASK_OFFSET);
868
869         /*
870          * Here's how you load register for a loop
871          * Counters are located @ 0x800
872          * Jump address are located @ 0xC00
873          * For both, registers 0 to 3 are selected using bits 3 and 2, like
874          * in 0x800, 0x804, 0x808, 0x80C and 0xC00, 0xC04, 0xC08, 0xC0C
875          * I know this ain't pretty, but Avalon bus throws away the 2 least
876          * significant bits
877          */
878
879         /* start with memory RESET activated */
880
881         /* tINIT = 200us */
882
883         /*
884          * 200us @ 266MHz (3.75 ns) ~ 54000 clock cycles
885          * If a and b are the number of iteration in 2 nested loops
886          * it takes the following number of cycles to complete the operation:
887          * number_of_cycles = ((2 + n) * a + 2) * b
888          * where n is the number of instruction in the inner loop
889          * One possible solution is n = 0 , a = 256 , b = 106 => a = FF,
890          * b = 6A
891          */
892
893         /* Load counters */
894         writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(SEQ_TINIT_CNTR0_VAL),
895                &sdr_rw_load_mgr_regs->load_cntr0);
896         writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(SEQ_TINIT_CNTR1_VAL),
897                &sdr_rw_load_mgr_regs->load_cntr1);
898         writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(SEQ_TINIT_CNTR2_VAL),
899                &sdr_rw_load_mgr_regs->load_cntr2);
900
901         /* Load jump address */
902         writel(RW_MGR_INIT_RESET_0_CKE_0,
903                 &sdr_rw_load_jump_mgr_regs->load_jump_add0);
904         writel(RW_MGR_INIT_RESET_0_CKE_0,
905                 &sdr_rw_load_jump_mgr_regs->load_jump_add1);
906         writel(RW_MGR_INIT_RESET_0_CKE_0,
907                 &sdr_rw_load_jump_mgr_regs->load_jump_add2);
908
909         /* Execute count instruction */
910         writel(RW_MGR_INIT_RESET_0_CKE_0, grpaddr);
911
912         /* indicate that memory is stable */
913         writel(1, &phy_mgr_cfg->reset_mem_stbl);
914
915         /*
916          * transition the RESET to high
917          * Wait for 500us
918          */
919
920         /*
921          * 500us @ 266MHz (3.75 ns) ~ 134000 clock cycles
922          * If a and b are the number of iteration in 2 nested loops
923          * it takes the following number of cycles to complete the operation
924          * number_of_cycles = ((2 + n) * a + 2) * b
925          * where n is the number of instruction in the inner loop
926          * One possible solution is n = 2 , a = 131 , b = 256 => a = 83,
927          * b = FF
928          */
929
930         /* Load counters */
931         writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(SEQ_TRESET_CNTR0_VAL),
932                &sdr_rw_load_mgr_regs->load_cntr0);
933         writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(SEQ_TRESET_CNTR1_VAL),
934                &sdr_rw_load_mgr_regs->load_cntr1);
935         writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(SEQ_TRESET_CNTR2_VAL),
936                &sdr_rw_load_mgr_regs->load_cntr2);
937
938         /* Load jump address */
939         writel(RW_MGR_INIT_RESET_1_CKE_0,
940                 &sdr_rw_load_jump_mgr_regs->load_jump_add0);
941         writel(RW_MGR_INIT_RESET_1_CKE_0,
942                 &sdr_rw_load_jump_mgr_regs->load_jump_add1);
943         writel(RW_MGR_INIT_RESET_1_CKE_0,
944                 &sdr_rw_load_jump_mgr_regs->load_jump_add2);
945
946         writel(RW_MGR_INIT_RESET_1_CKE_0, grpaddr);
947
948         /* bring up clock enable */
949
950         /* tXRP < 250 ck cycles */
951         delay_for_n_mem_clocks(250);
952
953         for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; r++) {
954                 if (param->skip_ranks[r]) {
955                         /* request to skip the rank */
956                         continue;
957                 }
958
959                 /* set rank */
960                 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_OFF);
961
962                 /*
963                  * USER Use Mirror-ed commands for odd ranks if address
964                  * mirrorring is on
965                  */
966                 if ((RW_MGR_MEM_ADDRESS_MIRRORING >> r) & 0x1) {
967                         set_jump_as_return();
968                         writel(RW_MGR_MRS2_MIRR, grpaddr);
969                         delay_for_n_mem_clocks(4);
970                         set_jump_as_return();
971                         writel(RW_MGR_MRS3_MIRR, grpaddr);
972                         delay_for_n_mem_clocks(4);
973                         set_jump_as_return();
974                         writel(RW_MGR_MRS1_MIRR, grpaddr);
975                         delay_for_n_mem_clocks(4);
976                         set_jump_as_return();
977                         writel(RW_MGR_MRS0_DLL_RESET_MIRR, grpaddr);
978                 } else {
979                         set_jump_as_return();
980                         writel(RW_MGR_MRS2, grpaddr);
981                         delay_for_n_mem_clocks(4);
982                         set_jump_as_return();
983                         writel(RW_MGR_MRS3, grpaddr);
984                         delay_for_n_mem_clocks(4);
985                         set_jump_as_return();
986                         writel(RW_MGR_MRS1, grpaddr);
987                         set_jump_as_return();
988                         writel(RW_MGR_MRS0_DLL_RESET, grpaddr);
989                 }
990                 set_jump_as_return();
991                 writel(RW_MGR_ZQCL, grpaddr);
992
993                 /* tZQinit = tDLLK = 512 ck cycles */
994                 delay_for_n_mem_clocks(512);
995         }
996 }
997
998 /*
999  * At the end of calibration we have to program the user settings in, and
1000  * USER  hand off the memory to the user.
1001  */
1002 static void rw_mgr_mem_handoff(void)
1003 {
1004         uint32_t r;
1005         uint32_t grpaddr = SDR_PHYGRP_RWMGRGRP_ADDRESS |
1006                            RW_MGR_RUN_SINGLE_GROUP_OFFSET;
1007
1008         debug("%s:%d\n", __func__, __LINE__);
1009         for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; r++) {
1010                 if (param->skip_ranks[r])
1011                         /* request to skip the rank */
1012                         continue;
1013                 /* set rank */
1014                 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_OFF);
1015
1016                 /* precharge all banks ... */
1017                 writel(RW_MGR_PRECHARGE_ALL, grpaddr);
1018
1019                 /* load up MR settings specified by user */
1020
1021                 /*
1022                  * Use Mirror-ed commands for odd ranks if address
1023                  * mirrorring is on
1024                  */
1025                 if ((RW_MGR_MEM_ADDRESS_MIRRORING >> r) & 0x1) {
1026                         set_jump_as_return();
1027                         writel(RW_MGR_MRS2_MIRR, grpaddr);
1028                         delay_for_n_mem_clocks(4);
1029                         set_jump_as_return();
1030                         writel(RW_MGR_MRS3_MIRR, grpaddr);
1031                         delay_for_n_mem_clocks(4);
1032                         set_jump_as_return();
1033                         writel(RW_MGR_MRS1_MIRR, grpaddr);
1034                         delay_for_n_mem_clocks(4);
1035                         set_jump_as_return();
1036                         writel(RW_MGR_MRS0_USER_MIRR, grpaddr);
1037                 } else {
1038                         set_jump_as_return();
1039                         writel(RW_MGR_MRS2, grpaddr);
1040                         delay_for_n_mem_clocks(4);
1041                         set_jump_as_return();
1042                         writel(RW_MGR_MRS3, grpaddr);
1043                         delay_for_n_mem_clocks(4);
1044                         set_jump_as_return();
1045                         writel(RW_MGR_MRS1, grpaddr);
1046                         delay_for_n_mem_clocks(4);
1047                         set_jump_as_return();
1048                         writel(RW_MGR_MRS0_USER, grpaddr);
1049                 }
1050                 /*
1051                  * USER  need to wait tMOD (12CK or 15ns) time before issuing
1052                  * other commands, but we will have plenty of NIOS cycles before
1053                  * actual handoff so its okay.
1054                  */
1055         }
1056 }
1057
1058 /*
1059  * performs a guaranteed read on the patterns we are going to use during a
1060  * read test to ensure memory works
1061  */
1062 static uint32_t rw_mgr_mem_calibrate_read_test_patterns(uint32_t rank_bgn,
1063         uint32_t group, uint32_t num_tries, uint32_t *bit_chk,
1064         uint32_t all_ranks)
1065 {
1066         uint32_t r, vg;
1067         uint32_t correct_mask_vg;
1068         uint32_t tmp_bit_chk;
1069         uint32_t rank_end = all_ranks ? RW_MGR_MEM_NUMBER_OF_RANKS :
1070                 (rank_bgn + NUM_RANKS_PER_SHADOW_REG);
1071         uint32_t addr;
1072         uint32_t base_rw_mgr;
1073
1074         *bit_chk = param->read_correct_mask;
1075         correct_mask_vg = param->read_correct_mask_vg;
1076
1077         for (r = rank_bgn; r < rank_end; r++) {
1078                 if (param->skip_ranks[r])
1079                         /* request to skip the rank */
1080                         continue;
1081
1082                 /* set rank */
1083                 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_READ_WRITE);
1084
1085                 /* Load up a constant bursts of read commands */
1086                 writel(0x20, &sdr_rw_load_mgr_regs->load_cntr0);
1087                 writel(RW_MGR_GUARANTEED_READ,
1088                         &sdr_rw_load_jump_mgr_regs->load_jump_add0);
1089
1090                 writel(0x20, &sdr_rw_load_mgr_regs->load_cntr1);
1091                 writel(RW_MGR_GUARANTEED_READ_CONT,
1092                         &sdr_rw_load_jump_mgr_regs->load_jump_add1);
1093
1094                 tmp_bit_chk = 0;
1095                 for (vg = RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS-1; ; vg--) {
1096                         /* reset the fifos to get pointers to known state */
1097
1098                         writel(0, &phy_mgr_cmd->fifo_reset);
1099                         writel(0, SDR_PHYGRP_RWMGRGRP_ADDRESS |
1100                                   RW_MGR_RESET_READ_DATAPATH_OFFSET);
1101
1102                         tmp_bit_chk = tmp_bit_chk << (RW_MGR_MEM_DQ_PER_READ_DQS
1103                                 / RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS);
1104
1105                         addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET;
1106                         writel(RW_MGR_GUARANTEED_READ, addr +
1107                                ((group * RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS +
1108                                 vg) << 2));
1109
1110                         base_rw_mgr = readl(SDR_PHYGRP_RWMGRGRP_ADDRESS);
1111                         tmp_bit_chk = tmp_bit_chk | (correct_mask_vg & (~base_rw_mgr));
1112
1113                         if (vg == 0)
1114                                 break;
1115                 }
1116                 *bit_chk &= tmp_bit_chk;
1117         }
1118
1119         addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET;
1120         writel(RW_MGR_CLEAR_DQS_ENABLE, addr + (group << 2));
1121
1122         set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF);
1123         debug_cond(DLEVEL == 1, "%s:%d test_load_patterns(%u,ALL) => (%u == %u) =>\
1124                    %lu\n", __func__, __LINE__, group, *bit_chk, param->read_correct_mask,
1125                    (long unsigned int)(*bit_chk == param->read_correct_mask));
1126         return *bit_chk == param->read_correct_mask;
1127 }
1128
1129 static uint32_t rw_mgr_mem_calibrate_read_test_patterns_all_ranks
1130         (uint32_t group, uint32_t num_tries, uint32_t *bit_chk)
1131 {
1132         return rw_mgr_mem_calibrate_read_test_patterns(0, group,
1133                 num_tries, bit_chk, 1);
1134 }
1135
1136 /* load up the patterns we are going to use during a read test */
1137 static void rw_mgr_mem_calibrate_read_load_patterns(uint32_t rank_bgn,
1138         uint32_t all_ranks)
1139 {
1140         uint32_t r;
1141         uint32_t rank_end = all_ranks ? RW_MGR_MEM_NUMBER_OF_RANKS :
1142                 (rank_bgn + NUM_RANKS_PER_SHADOW_REG);
1143
1144         debug("%s:%d\n", __func__, __LINE__);
1145         for (r = rank_bgn; r < rank_end; r++) {
1146                 if (param->skip_ranks[r])
1147                         /* request to skip the rank */
1148                         continue;
1149
1150                 /* set rank */
1151                 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_READ_WRITE);
1152
1153                 /* Load up a constant bursts */
1154                 writel(0x20, &sdr_rw_load_mgr_regs->load_cntr0);
1155
1156                 writel(RW_MGR_GUARANTEED_WRITE_WAIT0,
1157                         &sdr_rw_load_jump_mgr_regs->load_jump_add0);
1158
1159                 writel(0x20, &sdr_rw_load_mgr_regs->load_cntr1);
1160
1161                 writel(RW_MGR_GUARANTEED_WRITE_WAIT1,
1162                         &sdr_rw_load_jump_mgr_regs->load_jump_add1);
1163
1164                 writel(0x04, &sdr_rw_load_mgr_regs->load_cntr2);
1165
1166                 writel(RW_MGR_GUARANTEED_WRITE_WAIT2,
1167                         &sdr_rw_load_jump_mgr_regs->load_jump_add2);
1168
1169                 writel(0x04, &sdr_rw_load_mgr_regs->load_cntr3);
1170
1171                 writel(RW_MGR_GUARANTEED_WRITE_WAIT3,
1172                         &sdr_rw_load_jump_mgr_regs->load_jump_add3);
1173
1174                 writel(RW_MGR_GUARANTEED_WRITE, SDR_PHYGRP_RWMGRGRP_ADDRESS |
1175                                                 RW_MGR_RUN_SINGLE_GROUP_OFFSET);
1176         }
1177
1178         set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF);
1179 }
1180
1181 /*
1182  * try a read and see if it returns correct data back. has dummy reads
1183  * inserted into the mix used to align dqs enable. has more thorough checks
1184  * than the regular read test.
1185  */
1186 static uint32_t rw_mgr_mem_calibrate_read_test(uint32_t rank_bgn, uint32_t group,
1187         uint32_t num_tries, uint32_t all_correct, uint32_t *bit_chk,
1188         uint32_t all_groups, uint32_t all_ranks)
1189 {
1190         uint32_t r, vg;
1191         uint32_t correct_mask_vg;
1192         uint32_t tmp_bit_chk;
1193         uint32_t rank_end = all_ranks ? RW_MGR_MEM_NUMBER_OF_RANKS :
1194                 (rank_bgn + NUM_RANKS_PER_SHADOW_REG);
1195         uint32_t addr;
1196         uint32_t base_rw_mgr;
1197
1198         *bit_chk = param->read_correct_mask;
1199         correct_mask_vg = param->read_correct_mask_vg;
1200
1201         uint32_t quick_read_mode = (((STATIC_CALIB_STEPS) &
1202                 CALIB_SKIP_DELAY_SWEEPS) && ENABLE_SUPER_QUICK_CALIBRATION);
1203
1204         for (r = rank_bgn; r < rank_end; r++) {
1205                 if (param->skip_ranks[r])
1206                         /* request to skip the rank */
1207                         continue;
1208
1209                 /* set rank */
1210                 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_READ_WRITE);
1211
1212                 writel(0x10, &sdr_rw_load_mgr_regs->load_cntr1);
1213
1214                 writel(RW_MGR_READ_B2B_WAIT1,
1215                         &sdr_rw_load_jump_mgr_regs->load_jump_add1);
1216
1217                 writel(0x10, &sdr_rw_load_mgr_regs->load_cntr2);
1218                 writel(RW_MGR_READ_B2B_WAIT2,
1219                         &sdr_rw_load_jump_mgr_regs->load_jump_add2);
1220
1221                 if (quick_read_mode)
1222                         writel(0x1, &sdr_rw_load_mgr_regs->load_cntr0);
1223                         /* need at least two (1+1) reads to capture failures */
1224                 else if (all_groups)
1225                         writel(0x06, &sdr_rw_load_mgr_regs->load_cntr0);
1226                 else
1227                         writel(0x32, &sdr_rw_load_mgr_regs->load_cntr0);
1228
1229                 writel(RW_MGR_READ_B2B,
1230                         &sdr_rw_load_jump_mgr_regs->load_jump_add0);
1231                 if (all_groups)
1232                         writel(RW_MGR_MEM_IF_READ_DQS_WIDTH *
1233                                RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS - 1,
1234                                &sdr_rw_load_mgr_regs->load_cntr3);
1235                 else
1236                         writel(0x0, &sdr_rw_load_mgr_regs->load_cntr3);
1237
1238                 writel(RW_MGR_READ_B2B,
1239                         &sdr_rw_load_jump_mgr_regs->load_jump_add3);
1240
1241                 tmp_bit_chk = 0;
1242                 for (vg = RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS-1; ; vg--) {
1243                         /* reset the fifos to get pointers to known state */
1244                         writel(0, &phy_mgr_cmd->fifo_reset);
1245                         writel(0, SDR_PHYGRP_RWMGRGRP_ADDRESS |
1246                                   RW_MGR_RESET_READ_DATAPATH_OFFSET);
1247
1248                         tmp_bit_chk = tmp_bit_chk << (RW_MGR_MEM_DQ_PER_READ_DQS
1249                                 / RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS);
1250
1251                         if (all_groups)
1252                                 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_ALL_GROUPS_OFFSET;
1253                         else
1254                                 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET;
1255
1256                         writel(RW_MGR_READ_B2B, addr +
1257                                ((group * RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS +
1258                                vg) << 2));
1259
1260                         base_rw_mgr = readl(SDR_PHYGRP_RWMGRGRP_ADDRESS);
1261                         tmp_bit_chk = tmp_bit_chk | (correct_mask_vg & ~(base_rw_mgr));
1262
1263                         if (vg == 0)
1264                                 break;
1265                 }
1266                 *bit_chk &= tmp_bit_chk;
1267         }
1268
1269         addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET;
1270         writel(RW_MGR_CLEAR_DQS_ENABLE, addr + (group << 2));
1271
1272         if (all_correct) {
1273                 set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF);
1274                 debug_cond(DLEVEL == 2, "%s:%d read_test(%u,ALL,%u) =>\
1275                            (%u == %u) => %lu", __func__, __LINE__, group,
1276                            all_groups, *bit_chk, param->read_correct_mask,
1277                            (long unsigned int)(*bit_chk ==
1278                            param->read_correct_mask));
1279                 return *bit_chk == param->read_correct_mask;
1280         } else  {
1281                 set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF);
1282                 debug_cond(DLEVEL == 2, "%s:%d read_test(%u,ONE,%u) =>\
1283                            (%u != %lu) => %lu\n", __func__, __LINE__,
1284                            group, all_groups, *bit_chk, (long unsigned int)0,
1285                            (long unsigned int)(*bit_chk != 0x00));
1286                 return *bit_chk != 0x00;
1287         }
1288 }
1289
1290 static uint32_t rw_mgr_mem_calibrate_read_test_all_ranks(uint32_t group,
1291         uint32_t num_tries, uint32_t all_correct, uint32_t *bit_chk,
1292         uint32_t all_groups)
1293 {
1294         return rw_mgr_mem_calibrate_read_test(0, group, num_tries, all_correct,
1295                                               bit_chk, all_groups, 1);
1296 }
1297
1298 static void rw_mgr_incr_vfifo(uint32_t grp, uint32_t *v)
1299 {
1300         writel(grp, &phy_mgr_cmd->inc_vfifo_hard_phy);
1301         (*v)++;
1302 }
1303
1304 static void rw_mgr_decr_vfifo(uint32_t grp, uint32_t *v)
1305 {
1306         uint32_t i;
1307
1308         for (i = 0; i < VFIFO_SIZE-1; i++)
1309                 rw_mgr_incr_vfifo(grp, v);
1310 }
1311
1312 static int find_vfifo_read(uint32_t grp, uint32_t *bit_chk)
1313 {
1314         uint32_t  v;
1315         uint32_t fail_cnt = 0;
1316         uint32_t test_status;
1317
1318         for (v = 0; v < VFIFO_SIZE; ) {
1319                 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: vfifo %u\n",
1320                            __func__, __LINE__, v);
1321                 test_status = rw_mgr_mem_calibrate_read_test_all_ranks
1322                         (grp, 1, PASS_ONE_BIT, bit_chk, 0);
1323                 if (!test_status) {
1324                         fail_cnt++;
1325
1326                         if (fail_cnt == 2)
1327                                 break;
1328                 }
1329
1330                 /* fiddle with FIFO */
1331                 rw_mgr_incr_vfifo(grp, &v);
1332         }
1333
1334         if (v >= VFIFO_SIZE) {
1335                 /* no failing read found!! Something must have gone wrong */
1336                 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: vfifo failed\n",
1337                            __func__, __LINE__);
1338                 return 0;
1339         } else {
1340                 return v;
1341         }
1342 }
1343
1344 static int find_working_phase(uint32_t *grp, uint32_t *bit_chk,
1345                               uint32_t dtaps_per_ptap, uint32_t *work_bgn,
1346                               uint32_t *v, uint32_t *d, uint32_t *p,
1347                               uint32_t *i, uint32_t *max_working_cnt)
1348 {
1349         uint32_t found_begin = 0;
1350         uint32_t tmp_delay = 0;
1351         uint32_t test_status;
1352
1353         for (*d = 0; *d <= dtaps_per_ptap; (*d)++, tmp_delay +=
1354                 IO_DELAY_PER_DQS_EN_DCHAIN_TAP) {
1355                 *work_bgn = tmp_delay;
1356                 scc_mgr_set_dqs_en_delay_all_ranks(*grp, *d);
1357
1358                 for (*i = 0; *i < VFIFO_SIZE; (*i)++) {
1359                         for (*p = 0; *p <= IO_DQS_EN_PHASE_MAX; (*p)++, *work_bgn +=
1360                                 IO_DELAY_PER_OPA_TAP) {
1361                                 scc_mgr_set_dqs_en_phase_all_ranks(*grp, *p);
1362
1363                                 test_status =
1364                                 rw_mgr_mem_calibrate_read_test_all_ranks
1365                                 (*grp, 1, PASS_ONE_BIT, bit_chk, 0);
1366
1367                                 if (test_status) {
1368                                         *max_working_cnt = 1;
1369                                         found_begin = 1;
1370                                         break;
1371                                 }
1372                         }
1373
1374                         if (found_begin)
1375                                 break;
1376
1377                         if (*p > IO_DQS_EN_PHASE_MAX)
1378                                 /* fiddle with FIFO */
1379                                 rw_mgr_incr_vfifo(*grp, v);
1380                 }
1381
1382                 if (found_begin)
1383                         break;
1384         }
1385
1386         if (*i >= VFIFO_SIZE) {
1387                 /* cannot find working solution */
1388                 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: no vfifo/\
1389                            ptap/dtap\n", __func__, __LINE__);
1390                 return 0;
1391         } else {
1392                 return 1;
1393         }
1394 }
1395
1396 static void sdr_backup_phase(uint32_t *grp, uint32_t *bit_chk,
1397                              uint32_t *work_bgn, uint32_t *v, uint32_t *d,
1398                              uint32_t *p, uint32_t *max_working_cnt)
1399 {
1400         uint32_t found_begin = 0;
1401         uint32_t tmp_delay;
1402
1403         /* Special case code for backing up a phase */
1404         if (*p == 0) {
1405                 *p = IO_DQS_EN_PHASE_MAX;
1406                 rw_mgr_decr_vfifo(*grp, v);
1407         } else {
1408                 (*p)--;
1409         }
1410         tmp_delay = *work_bgn - IO_DELAY_PER_OPA_TAP;
1411         scc_mgr_set_dqs_en_phase_all_ranks(*grp, *p);
1412
1413         for (*d = 0; *d <= IO_DQS_EN_DELAY_MAX && tmp_delay < *work_bgn;
1414                 (*d)++, tmp_delay += IO_DELAY_PER_DQS_EN_DCHAIN_TAP) {
1415                 scc_mgr_set_dqs_en_delay_all_ranks(*grp, *d);
1416
1417                 if (rw_mgr_mem_calibrate_read_test_all_ranks(*grp, 1,
1418                                                              PASS_ONE_BIT,
1419                                                              bit_chk, 0)) {
1420                         found_begin = 1;
1421                         *work_bgn = tmp_delay;
1422                         break;
1423                 }
1424         }
1425
1426         /* We have found a working dtap before the ptap found above */
1427         if (found_begin == 1)
1428                 (*max_working_cnt)++;
1429
1430         /*
1431          * Restore VFIFO to old state before we decremented it
1432          * (if needed).
1433          */
1434         (*p)++;
1435         if (*p > IO_DQS_EN_PHASE_MAX) {
1436                 *p = 0;
1437                 rw_mgr_incr_vfifo(*grp, v);
1438         }
1439
1440         scc_mgr_set_dqs_en_delay_all_ranks(*grp, 0);
1441 }
1442
1443 static int sdr_nonworking_phase(uint32_t *grp, uint32_t *bit_chk,
1444                              uint32_t *work_bgn, uint32_t *v, uint32_t *d,
1445                              uint32_t *p, uint32_t *i, uint32_t *max_working_cnt,
1446                              uint32_t *work_end)
1447 {
1448         uint32_t found_end = 0;
1449
1450         (*p)++;
1451         *work_end += IO_DELAY_PER_OPA_TAP;
1452         if (*p > IO_DQS_EN_PHASE_MAX) {
1453                 /* fiddle with FIFO */
1454                 *p = 0;
1455                 rw_mgr_incr_vfifo(*grp, v);
1456         }
1457
1458         for (; *i < VFIFO_SIZE + 1; (*i)++) {
1459                 for (; *p <= IO_DQS_EN_PHASE_MAX; (*p)++, *work_end
1460                         += IO_DELAY_PER_OPA_TAP) {
1461                         scc_mgr_set_dqs_en_phase_all_ranks(*grp, *p);
1462
1463                         if (!rw_mgr_mem_calibrate_read_test_all_ranks
1464                                 (*grp, 1, PASS_ONE_BIT, bit_chk, 0)) {
1465                                 found_end = 1;
1466                                 break;
1467                         } else {
1468                                 (*max_working_cnt)++;
1469                         }
1470                 }
1471
1472                 if (found_end)
1473                         break;
1474
1475                 if (*p > IO_DQS_EN_PHASE_MAX) {
1476                         /* fiddle with FIFO */
1477                         rw_mgr_incr_vfifo(*grp, v);
1478                         *p = 0;
1479                 }
1480         }
1481
1482         if (*i >= VFIFO_SIZE + 1) {
1483                 /* cannot see edge of failing read */
1484                 debug_cond(DLEVEL == 2, "%s:%d sdr_nonworking_phase: end:\
1485                            failed\n", __func__, __LINE__);
1486                 return 0;
1487         } else {
1488                 return 1;
1489         }
1490 }
1491
1492 static int sdr_find_window_centre(uint32_t *grp, uint32_t *bit_chk,
1493                                   uint32_t *work_bgn, uint32_t *v, uint32_t *d,
1494                                   uint32_t *p, uint32_t *work_mid,
1495                                   uint32_t *work_end)
1496 {
1497         int i;
1498         int tmp_delay = 0;
1499
1500         *work_mid = (*work_bgn + *work_end) / 2;
1501
1502         debug_cond(DLEVEL == 2, "work_bgn=%d work_end=%d work_mid=%d\n",
1503                    *work_bgn, *work_end, *work_mid);
1504         /* Get the middle delay to be less than a VFIFO delay */
1505         for (*p = 0; *p <= IO_DQS_EN_PHASE_MAX;
1506                 (*p)++, tmp_delay += IO_DELAY_PER_OPA_TAP)
1507                 ;
1508         debug_cond(DLEVEL == 2, "vfifo ptap delay %d\n", tmp_delay);
1509         while (*work_mid > tmp_delay)
1510                 *work_mid -= tmp_delay;
1511         debug_cond(DLEVEL == 2, "new work_mid %d\n", *work_mid);
1512
1513         tmp_delay = 0;
1514         for (*p = 0; *p <= IO_DQS_EN_PHASE_MAX && tmp_delay < *work_mid;
1515                 (*p)++, tmp_delay += IO_DELAY_PER_OPA_TAP)
1516                 ;
1517         tmp_delay -= IO_DELAY_PER_OPA_TAP;
1518         debug_cond(DLEVEL == 2, "new p %d, tmp_delay=%d\n", (*p) - 1, tmp_delay);
1519         for (*d = 0; *d <= IO_DQS_EN_DELAY_MAX && tmp_delay < *work_mid; (*d)++,
1520                 tmp_delay += IO_DELAY_PER_DQS_EN_DCHAIN_TAP)
1521                 ;
1522         debug_cond(DLEVEL == 2, "new d %d, tmp_delay=%d\n", *d, tmp_delay);
1523
1524         scc_mgr_set_dqs_en_phase_all_ranks(*grp, (*p) - 1);
1525         scc_mgr_set_dqs_en_delay_all_ranks(*grp, *d);
1526
1527         /*
1528          * push vfifo until we can successfully calibrate. We can do this
1529          * because the largest possible margin in 1 VFIFO cycle.
1530          */
1531         for (i = 0; i < VFIFO_SIZE; i++) {
1532                 debug_cond(DLEVEL == 2, "find_dqs_en_phase: center: vfifo=%u\n",
1533                            *v);
1534                 if (rw_mgr_mem_calibrate_read_test_all_ranks(*grp, 1,
1535                                                              PASS_ONE_BIT,
1536                                                              bit_chk, 0)) {
1537                         break;
1538                 }
1539
1540                 /* fiddle with FIFO */
1541                 rw_mgr_incr_vfifo(*grp, v);
1542         }
1543
1544         if (i >= VFIFO_SIZE) {
1545                 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: center: \
1546                            failed\n", __func__, __LINE__);
1547                 return 0;
1548         } else {
1549                 return 1;
1550         }
1551 }
1552
1553 /* find a good dqs enable to use */
1554 static uint32_t rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase(uint32_t grp)
1555 {
1556         uint32_t v, d, p, i;
1557         uint32_t max_working_cnt;
1558         uint32_t bit_chk;
1559         uint32_t dtaps_per_ptap;
1560         uint32_t work_bgn, work_mid, work_end;
1561         uint32_t found_passing_read, found_failing_read, initial_failing_dtap;
1562
1563         debug("%s:%d %u\n", __func__, __LINE__, grp);
1564
1565         reg_file_set_sub_stage(CAL_SUBSTAGE_VFIFO_CENTER);
1566
1567         scc_mgr_set_dqs_en_delay_all_ranks(grp, 0);
1568         scc_mgr_set_dqs_en_phase_all_ranks(grp, 0);
1569
1570         /* ************************************************************** */
1571         /* * Step 0 : Determine number of delay taps for each phase tap * */
1572         dtaps_per_ptap = IO_DELAY_PER_OPA_TAP/IO_DELAY_PER_DQS_EN_DCHAIN_TAP;
1573
1574         /* ********************************************************* */
1575         /* * Step 1 : First push vfifo until we get a failing read * */
1576         v = find_vfifo_read(grp, &bit_chk);
1577
1578         max_working_cnt = 0;
1579
1580         /* ******************************************************** */
1581         /* * step 2: find first working phase, increment in ptaps * */
1582         work_bgn = 0;
1583         if (find_working_phase(&grp, &bit_chk, dtaps_per_ptap, &work_bgn, &v, &d,
1584                                 &p, &i, &max_working_cnt) == 0)
1585                 return 0;
1586
1587         work_end = work_bgn;
1588
1589         /*
1590          * If d is 0 then the working window covers a phase tap and
1591          * we can follow the old procedure otherwise, we've found the beginning,
1592          * and we need to increment the dtaps until we find the end.
1593          */
1594         if (d == 0) {
1595                 /* ********************************************************* */
1596                 /* * step 3a: if we have room, back off by one and
1597                 increment in dtaps * */
1598
1599                 sdr_backup_phase(&grp, &bit_chk, &work_bgn, &v, &d, &p,
1600                                  &max_working_cnt);
1601
1602                 /* ********************************************************* */
1603                 /* * step 4a: go forward from working phase to non working
1604                 phase, increment in ptaps * */
1605                 if (sdr_nonworking_phase(&grp, &bit_chk, &work_bgn, &v, &d, &p,
1606                                          &i, &max_working_cnt, &work_end) == 0)
1607                         return 0;
1608
1609                 /* ********************************************************* */
1610                 /* * step 5a:  back off one from last, increment in dtaps  * */
1611
1612                 /* Special case code for backing up a phase */
1613                 if (p == 0) {
1614                         p = IO_DQS_EN_PHASE_MAX;
1615                         rw_mgr_decr_vfifo(grp, &v);
1616                 } else {
1617                         p = p - 1;
1618                 }
1619
1620                 work_end -= IO_DELAY_PER_OPA_TAP;
1621                 scc_mgr_set_dqs_en_phase_all_ranks(grp, p);
1622
1623                 /* * The actual increment of dtaps is done outside of
1624                 the if/else loop to share code */
1625                 d = 0;
1626
1627                 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: v/p: \
1628                            vfifo=%u ptap=%u\n", __func__, __LINE__,
1629                            v, p);
1630         } else {
1631                 /* ******************************************************* */
1632                 /* * step 3-5b:  Find the right edge of the window using
1633                 delay taps   * */
1634                 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase:vfifo=%u \
1635                            ptap=%u dtap=%u bgn=%u\n", __func__, __LINE__,
1636                            v, p, d, work_bgn);
1637
1638                 work_end = work_bgn;
1639
1640                 /* * The actual increment of dtaps is done outside of the
1641                 if/else loop to share code */
1642
1643                 /* Only here to counterbalance a subtract later on which is
1644                 not needed if this branch of the algorithm is taken */
1645                 max_working_cnt++;
1646         }
1647
1648         /* The dtap increment to find the failing edge is done here */
1649         for (; d <= IO_DQS_EN_DELAY_MAX; d++, work_end +=
1650                 IO_DELAY_PER_DQS_EN_DCHAIN_TAP) {
1651                         debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: \
1652                                    end-2: dtap=%u\n", __func__, __LINE__, d);
1653                         scc_mgr_set_dqs_en_delay_all_ranks(grp, d);
1654
1655                         if (!rw_mgr_mem_calibrate_read_test_all_ranks(grp, 1,
1656                                                                       PASS_ONE_BIT,
1657                                                                       &bit_chk, 0)) {
1658                                 break;
1659                         }
1660         }
1661
1662         /* Go back to working dtap */
1663         if (d != 0)
1664                 work_end -= IO_DELAY_PER_DQS_EN_DCHAIN_TAP;
1665
1666         debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: v/p/d: vfifo=%u \
1667                    ptap=%u dtap=%u end=%u\n", __func__, __LINE__,
1668                    v, p, d-1, work_end);
1669
1670         if (work_end < work_bgn) {
1671                 /* nil range */
1672                 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: end-2: \
1673                            failed\n", __func__, __LINE__);
1674                 return 0;
1675         }
1676
1677         debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: found range [%u,%u]\n",
1678                    __func__, __LINE__, work_bgn, work_end);
1679
1680         /* *************************************************************** */
1681         /*
1682          * * We need to calculate the number of dtaps that equal a ptap
1683          * * To do that we'll back up a ptap and re-find the edge of the
1684          * * window using dtaps
1685          */
1686
1687         debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: calculate dtaps_per_ptap \
1688                    for tracking\n", __func__, __LINE__);
1689
1690         /* Special case code for backing up a phase */
1691         if (p == 0) {
1692                 p = IO_DQS_EN_PHASE_MAX;
1693                 rw_mgr_decr_vfifo(grp, &v);
1694                 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: backedup \
1695                            cycle/phase: v=%u p=%u\n", __func__, __LINE__,
1696                            v, p);
1697         } else {
1698                 p = p - 1;
1699                 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: backedup \
1700                            phase only: v=%u p=%u", __func__, __LINE__,
1701                            v, p);
1702         }
1703
1704         scc_mgr_set_dqs_en_phase_all_ranks(grp, p);
1705
1706         /*
1707          * Increase dtap until we first see a passing read (in case the
1708          * window is smaller than a ptap),
1709          * and then a failing read to mark the edge of the window again
1710          */
1711
1712         /* Find a passing read */
1713         debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: find passing read\n",
1714                    __func__, __LINE__);
1715         found_passing_read = 0;
1716         found_failing_read = 0;
1717         initial_failing_dtap = d;
1718         for (; d <= IO_DQS_EN_DELAY_MAX; d++) {
1719                 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: testing \
1720                            read d=%u\n", __func__, __LINE__, d);
1721                 scc_mgr_set_dqs_en_delay_all_ranks(grp, d);
1722
1723                 if (rw_mgr_mem_calibrate_read_test_all_ranks(grp, 1,
1724                                                              PASS_ONE_BIT,
1725                                                              &bit_chk, 0)) {
1726                         found_passing_read = 1;
1727                         break;
1728                 }
1729         }
1730
1731         if (found_passing_read) {
1732                 /* Find a failing read */
1733                 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: find failing \
1734                            read\n", __func__, __LINE__);
1735                 for (d = d + 1; d <= IO_DQS_EN_DELAY_MAX; d++) {
1736                         debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: \
1737                                    testing read d=%u\n", __func__, __LINE__, d);
1738                         scc_mgr_set_dqs_en_delay_all_ranks(grp, d);
1739
1740                         if (!rw_mgr_mem_calibrate_read_test_all_ranks
1741                                 (grp, 1, PASS_ONE_BIT, &bit_chk, 0)) {
1742                                 found_failing_read = 1;
1743                                 break;
1744                         }
1745                 }
1746         } else {
1747                 debug_cond(DLEVEL == 1, "%s:%d find_dqs_en_phase: failed to \
1748                            calculate dtaps", __func__, __LINE__);
1749                 debug_cond(DLEVEL == 1, "per ptap. Fall back on static value\n");
1750         }
1751
1752         /*
1753          * The dynamically calculated dtaps_per_ptap is only valid if we
1754          * found a passing/failing read. If we didn't, it means d hit the max
1755          * (IO_DQS_EN_DELAY_MAX). Otherwise, dtaps_per_ptap retains its
1756          * statically calculated value.
1757          */
1758         if (found_passing_read && found_failing_read)
1759                 dtaps_per_ptap = d - initial_failing_dtap;
1760
1761         writel(dtaps_per_ptap, &sdr_reg_file->dtaps_per_ptap);
1762         debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: dtaps_per_ptap=%u \
1763                    - %u = %u",  __func__, __LINE__, d,
1764                    initial_failing_dtap, dtaps_per_ptap);
1765
1766         /* ******************************************** */
1767         /* * step 6:  Find the centre of the window   * */
1768         if (sdr_find_window_centre(&grp, &bit_chk, &work_bgn, &v, &d, &p,
1769                                    &work_mid, &work_end) == 0)
1770                 return 0;
1771
1772         debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: center found: \
1773                    vfifo=%u ptap=%u dtap=%u\n", __func__, __LINE__,
1774                    v, p-1, d);
1775         return 1;
1776 }
1777
1778 /*
1779  * Try rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase across different
1780  * dq_in_delay values
1781  */
1782 static uint32_t
1783 rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase_sweep_dq_in_delay
1784 (uint32_t write_group, uint32_t read_group, uint32_t test_bgn)
1785 {
1786         uint32_t found;
1787         uint32_t i;
1788         uint32_t p;
1789         uint32_t d;
1790         uint32_t r;
1791
1792         const uint32_t delay_step = IO_IO_IN_DELAY_MAX /
1793                 (RW_MGR_MEM_DQ_PER_READ_DQS-1);
1794                 /* we start at zero, so have one less dq to devide among */
1795
1796         debug("%s:%d (%u,%u,%u)", __func__, __LINE__, write_group, read_group,
1797               test_bgn);
1798
1799         /* try different dq_in_delays since the dq path is shorter than dqs */
1800
1801         for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS;
1802              r += NUM_RANKS_PER_SHADOW_REG) {
1803                 for (i = 0, p = test_bgn, d = 0; i < RW_MGR_MEM_DQ_PER_READ_DQS;
1804                         i++, p++, d += delay_step) {
1805                         debug_cond(DLEVEL == 1, "%s:%d rw_mgr_mem_calibrate_\
1806                                    vfifo_find_dqs_", __func__, __LINE__);
1807                         debug_cond(DLEVEL == 1, "en_phase_sweep_dq_in_delay: g=%u/%u ",
1808                                write_group, read_group);
1809                         debug_cond(DLEVEL == 1, "r=%u, i=%u p=%u d=%u\n", r, i , p, d);
1810                         scc_mgr_set_dq_in_delay(p, d);
1811                         scc_mgr_load_dq(p);
1812                 }
1813                 writel(0, &sdr_scc_mgr->update);
1814         }
1815
1816         found = rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase(read_group);
1817
1818         debug_cond(DLEVEL == 1, "%s:%d rw_mgr_mem_calibrate_vfifo_find_dqs_\
1819                    en_phase_sweep_dq", __func__, __LINE__);
1820         debug_cond(DLEVEL == 1, "_in_delay: g=%u/%u found=%u; Reseting delay \
1821                    chain to zero\n", write_group, read_group, found);
1822
1823         for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS;
1824              r += NUM_RANKS_PER_SHADOW_REG) {
1825                 for (i = 0, p = test_bgn; i < RW_MGR_MEM_DQ_PER_READ_DQS;
1826                         i++, p++) {
1827                         scc_mgr_set_dq_in_delay(p, 0);
1828                         scc_mgr_load_dq(p);
1829                 }
1830                 writel(0, &sdr_scc_mgr->update);
1831         }
1832
1833         return found;
1834 }
1835
1836 /* per-bit deskew DQ and center */
1837 static uint32_t rw_mgr_mem_calibrate_vfifo_center(uint32_t rank_bgn,
1838         uint32_t write_group, uint32_t read_group, uint32_t test_bgn,
1839         uint32_t use_read_test, uint32_t update_fom)
1840 {
1841         uint32_t i, p, d, min_index;
1842         /*
1843          * Store these as signed since there are comparisons with
1844          * signed numbers.
1845          */
1846         uint32_t bit_chk;
1847         uint32_t sticky_bit_chk;
1848         int32_t left_edge[RW_MGR_MEM_DQ_PER_READ_DQS];
1849         int32_t right_edge[RW_MGR_MEM_DQ_PER_READ_DQS];
1850         int32_t final_dq[RW_MGR_MEM_DQ_PER_READ_DQS];
1851         int32_t mid;
1852         int32_t orig_mid_min, mid_min;
1853         int32_t new_dqs, start_dqs, start_dqs_en, shift_dq, final_dqs,
1854                 final_dqs_en;
1855         int32_t dq_margin, dqs_margin;
1856         uint32_t stop;
1857         uint32_t temp_dq_in_delay1, temp_dq_in_delay2;
1858         uint32_t addr;
1859
1860         debug("%s:%d: %u %u", __func__, __LINE__, read_group, test_bgn);
1861
1862         addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_DQS_IN_DELAY_OFFSET;
1863         start_dqs = readl(addr + (read_group << 2));
1864         if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS)
1865                 start_dqs_en = readl(addr + ((read_group << 2)
1866                                      - IO_DQS_EN_DELAY_OFFSET));
1867
1868         /* set the left and right edge of each bit to an illegal value */
1869         /* use (IO_IO_IN_DELAY_MAX + 1) as an illegal value */
1870         sticky_bit_chk = 0;
1871         for (i = 0; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++) {
1872                 left_edge[i]  = IO_IO_IN_DELAY_MAX + 1;
1873                 right_edge[i] = IO_IO_IN_DELAY_MAX + 1;
1874         }
1875
1876         /* Search for the left edge of the window for each bit */
1877         for (d = 0; d <= IO_IO_IN_DELAY_MAX; d++) {
1878                 scc_mgr_apply_group_dq_in_delay(write_group, test_bgn, d);
1879
1880                 writel(0, &sdr_scc_mgr->update);
1881
1882                 /*
1883                  * Stop searching when the read test doesn't pass AND when
1884                  * we've seen a passing read on every bit.
1885                  */
1886                 if (use_read_test) {
1887                         stop = !rw_mgr_mem_calibrate_read_test(rank_bgn,
1888                                 read_group, NUM_READ_PB_TESTS, PASS_ONE_BIT,
1889                                 &bit_chk, 0, 0);
1890                 } else {
1891                         rw_mgr_mem_calibrate_write_test(rank_bgn, write_group,
1892                                                         0, PASS_ONE_BIT,
1893                                                         &bit_chk, 0);
1894                         bit_chk = bit_chk >> (RW_MGR_MEM_DQ_PER_READ_DQS *
1895                                 (read_group - (write_group *
1896                                         RW_MGR_MEM_IF_READ_DQS_WIDTH /
1897                                         RW_MGR_MEM_IF_WRITE_DQS_WIDTH)));
1898                         stop = (bit_chk == 0);
1899                 }
1900                 sticky_bit_chk = sticky_bit_chk | bit_chk;
1901                 stop = stop && (sticky_bit_chk == param->read_correct_mask);
1902                 debug_cond(DLEVEL == 2, "%s:%d vfifo_center(left): dtap=%u => %u == %u \
1903                            && %u", __func__, __LINE__, d,
1904                            sticky_bit_chk,
1905                         param->read_correct_mask, stop);
1906
1907                 if (stop == 1) {
1908                         break;
1909                 } else {
1910                         for (i = 0; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++) {
1911                                 if (bit_chk & 1) {
1912                                         /* Remember a passing test as the
1913                                         left_edge */
1914                                         left_edge[i] = d;
1915                                 } else {
1916                                         /* If a left edge has not been seen yet,
1917                                         then a future passing test will mark
1918                                         this edge as the right edge */
1919                                         if (left_edge[i] ==
1920                                                 IO_IO_IN_DELAY_MAX + 1) {
1921                                                 right_edge[i] = -(d + 1);
1922                                         }
1923                                 }
1924                                 bit_chk = bit_chk >> 1;
1925                         }
1926                 }
1927         }
1928
1929         /* Reset DQ delay chains to 0 */
1930         scc_mgr_apply_group_dq_in_delay(write_group, test_bgn, 0);
1931         sticky_bit_chk = 0;
1932         for (i = RW_MGR_MEM_DQ_PER_READ_DQS - 1;; i--) {
1933                 debug_cond(DLEVEL == 2, "%s:%d vfifo_center: left_edge[%u]: \
1934                            %d right_edge[%u]: %d\n", __func__, __LINE__,
1935                            i, left_edge[i], i, right_edge[i]);
1936
1937                 /*
1938                  * Check for cases where we haven't found the left edge,
1939                  * which makes our assignment of the the right edge invalid.
1940                  * Reset it to the illegal value.
1941                  */
1942                 if ((left_edge[i] == IO_IO_IN_DELAY_MAX + 1) && (
1943                         right_edge[i] != IO_IO_IN_DELAY_MAX + 1)) {
1944                         right_edge[i] = IO_IO_IN_DELAY_MAX + 1;
1945                         debug_cond(DLEVEL == 2, "%s:%d vfifo_center: reset \
1946                                    right_edge[%u]: %d\n", __func__, __LINE__,
1947                                    i, right_edge[i]);
1948                 }
1949
1950                 /*
1951                  * Reset sticky bit (except for bits where we have seen
1952                  * both the left and right edge).
1953                  */
1954                 sticky_bit_chk = sticky_bit_chk << 1;
1955                 if ((left_edge[i] != IO_IO_IN_DELAY_MAX + 1) &&
1956                     (right_edge[i] != IO_IO_IN_DELAY_MAX + 1)) {
1957                         sticky_bit_chk = sticky_bit_chk | 1;
1958                 }
1959
1960                 if (i == 0)
1961                         break;
1962         }
1963
1964         /* Search for the right edge of the window for each bit */
1965         for (d = 0; d <= IO_DQS_IN_DELAY_MAX - start_dqs; d++) {
1966                 scc_mgr_set_dqs_bus_in_delay(read_group, d + start_dqs);
1967                 if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS) {
1968                         uint32_t delay = d + start_dqs_en;
1969                         if (delay > IO_DQS_EN_DELAY_MAX)
1970                                 delay = IO_DQS_EN_DELAY_MAX;
1971                         scc_mgr_set_dqs_en_delay(read_group, delay);
1972                 }
1973                 scc_mgr_load_dqs(read_group);
1974
1975                 writel(0, &sdr_scc_mgr->update);
1976
1977                 /*
1978                  * Stop searching when the read test doesn't pass AND when
1979                  * we've seen a passing read on every bit.
1980                  */
1981                 if (use_read_test) {
1982                         stop = !rw_mgr_mem_calibrate_read_test(rank_bgn,
1983                                 read_group, NUM_READ_PB_TESTS, PASS_ONE_BIT,
1984                                 &bit_chk, 0, 0);
1985                 } else {
1986                         rw_mgr_mem_calibrate_write_test(rank_bgn, write_group,
1987                                                         0, PASS_ONE_BIT,
1988                                                         &bit_chk, 0);
1989                         bit_chk = bit_chk >> (RW_MGR_MEM_DQ_PER_READ_DQS *
1990                                 (read_group - (write_group *
1991                                         RW_MGR_MEM_IF_READ_DQS_WIDTH /
1992                                         RW_MGR_MEM_IF_WRITE_DQS_WIDTH)));
1993                         stop = (bit_chk == 0);
1994                 }
1995                 sticky_bit_chk = sticky_bit_chk | bit_chk;
1996                 stop = stop && (sticky_bit_chk == param->read_correct_mask);
1997
1998                 debug_cond(DLEVEL == 2, "%s:%d vfifo_center(right): dtap=%u => %u == \
1999                            %u && %u", __func__, __LINE__, d,
2000                            sticky_bit_chk, param->read_correct_mask, stop);
2001
2002                 if (stop == 1) {
2003                         break;
2004                 } else {
2005                         for (i = 0; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++) {
2006                                 if (bit_chk & 1) {
2007                                         /* Remember a passing test as
2008                                         the right_edge */
2009                                         right_edge[i] = d;
2010                                 } else {
2011                                         if (d != 0) {
2012                                                 /* If a right edge has not been
2013                                                 seen yet, then a future passing
2014                                                 test will mark this edge as the
2015                                                 left edge */
2016                                                 if (right_edge[i] ==
2017                                                 IO_IO_IN_DELAY_MAX + 1) {
2018                                                         left_edge[i] = -(d + 1);
2019                                                 }
2020                                         } else {
2021                                                 /* d = 0 failed, but it passed
2022                                                 when testing the left edge,
2023                                                 so it must be marginal,
2024                                                 set it to -1 */
2025                                                 if (right_edge[i] ==
2026                                                         IO_IO_IN_DELAY_MAX + 1 &&
2027                                                         left_edge[i] !=
2028                                                         IO_IO_IN_DELAY_MAX
2029                                                         + 1) {
2030                                                         right_edge[i] = -1;
2031                                                 }
2032                                                 /* If a right edge has not been
2033                                                 seen yet, then a future passing
2034                                                 test will mark this edge as the
2035                                                 left edge */
2036                                                 else if (right_edge[i] ==
2037                                                         IO_IO_IN_DELAY_MAX +
2038                                                         1) {
2039                                                         left_edge[i] = -(d + 1);
2040                                                 }
2041                                         }
2042                                 }
2043
2044                                 debug_cond(DLEVEL == 2, "%s:%d vfifo_center[r,\
2045                                            d=%u]: ", __func__, __LINE__, d);
2046                                 debug_cond(DLEVEL == 2, "bit_chk_test=%d left_edge[%u]: %d ",
2047                                            (int)(bit_chk & 1), i, left_edge[i]);
2048                                 debug_cond(DLEVEL == 2, "right_edge[%u]: %d\n", i,
2049                                            right_edge[i]);
2050                                 bit_chk = bit_chk >> 1;
2051                         }
2052                 }
2053         }
2054
2055         /* Check that all bits have a window */
2056         for (i = 0; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++) {
2057                 debug_cond(DLEVEL == 2, "%s:%d vfifo_center: left_edge[%u]: \
2058                            %d right_edge[%u]: %d", __func__, __LINE__,
2059                            i, left_edge[i], i, right_edge[i]);
2060                 if ((left_edge[i] == IO_IO_IN_DELAY_MAX + 1) || (right_edge[i]
2061                         == IO_IO_IN_DELAY_MAX + 1)) {
2062                         /*
2063                          * Restore delay chain settings before letting the loop
2064                          * in rw_mgr_mem_calibrate_vfifo to retry different
2065                          * dqs/ck relationships.
2066                          */
2067                         scc_mgr_set_dqs_bus_in_delay(read_group, start_dqs);
2068                         if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS) {
2069                                 scc_mgr_set_dqs_en_delay(read_group,
2070                                                          start_dqs_en);
2071                         }
2072                         scc_mgr_load_dqs(read_group);
2073                         writel(0, &sdr_scc_mgr->update);
2074
2075                         debug_cond(DLEVEL == 1, "%s:%d vfifo_center: failed to \
2076                                    find edge [%u]: %d %d", __func__, __LINE__,
2077                                    i, left_edge[i], right_edge[i]);
2078                         if (use_read_test) {
2079                                 set_failing_group_stage(read_group *
2080                                         RW_MGR_MEM_DQ_PER_READ_DQS + i,
2081                                         CAL_STAGE_VFIFO,
2082                                         CAL_SUBSTAGE_VFIFO_CENTER);
2083                         } else {
2084                                 set_failing_group_stage(read_group *
2085                                         RW_MGR_MEM_DQ_PER_READ_DQS + i,
2086                                         CAL_STAGE_VFIFO_AFTER_WRITES,
2087                                         CAL_SUBSTAGE_VFIFO_CENTER);
2088                         }
2089                         return 0;
2090                 }
2091         }
2092
2093         /* Find middle of window for each DQ bit */
2094         mid_min = left_edge[0] - right_edge[0];
2095         min_index = 0;
2096         for (i = 1; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++) {
2097                 mid = left_edge[i] - right_edge[i];
2098                 if (mid < mid_min) {
2099                         mid_min = mid;
2100                         min_index = i;
2101                 }
2102         }
2103
2104         /*
2105          * -mid_min/2 represents the amount that we need to move DQS.
2106          * If mid_min is odd and positive we'll need to add one to
2107          * make sure the rounding in further calculations is correct
2108          * (always bias to the right), so just add 1 for all positive values.
2109          */
2110         if (mid_min > 0)
2111                 mid_min++;
2112
2113         mid_min = mid_min / 2;
2114
2115         debug_cond(DLEVEL == 1, "%s:%d vfifo_center: mid_min=%d (index=%u)\n",
2116                    __func__, __LINE__, mid_min, min_index);
2117
2118         /* Determine the amount we can change DQS (which is -mid_min) */
2119         orig_mid_min = mid_min;
2120         new_dqs = start_dqs - mid_min;
2121         if (new_dqs > IO_DQS_IN_DELAY_MAX)
2122                 new_dqs = IO_DQS_IN_DELAY_MAX;
2123         else if (new_dqs < 0)
2124                 new_dqs = 0;
2125
2126         mid_min = start_dqs - new_dqs;
2127         debug_cond(DLEVEL == 1, "vfifo_center: new mid_min=%d new_dqs=%d\n",
2128                    mid_min, new_dqs);
2129
2130         if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS) {
2131                 if (start_dqs_en - mid_min > IO_DQS_EN_DELAY_MAX)
2132                         mid_min += start_dqs_en - mid_min - IO_DQS_EN_DELAY_MAX;
2133                 else if (start_dqs_en - mid_min < 0)
2134                         mid_min += start_dqs_en - mid_min;
2135         }
2136         new_dqs = start_dqs - mid_min;
2137
2138         debug_cond(DLEVEL == 1, "vfifo_center: start_dqs=%d start_dqs_en=%d \
2139                    new_dqs=%d mid_min=%d\n", start_dqs,
2140                    IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS ? start_dqs_en : -1,
2141                    new_dqs, mid_min);
2142
2143         /* Initialize data for export structures */
2144         dqs_margin = IO_IO_IN_DELAY_MAX + 1;
2145         dq_margin  = IO_IO_IN_DELAY_MAX + 1;
2146
2147         /* add delay to bring centre of all DQ windows to the same "level" */
2148         for (i = 0, p = test_bgn; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++, p++) {
2149                 /* Use values before divide by 2 to reduce round off error */
2150                 shift_dq = (left_edge[i] - right_edge[i] -
2151                         (left_edge[min_index] - right_edge[min_index]))/2  +
2152                         (orig_mid_min - mid_min);
2153
2154                 debug_cond(DLEVEL == 2, "vfifo_center: before: \
2155                            shift_dq[%u]=%d\n", i, shift_dq);
2156
2157                 addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_IO_IN_DELAY_OFFSET;
2158                 temp_dq_in_delay1 = readl(addr + (p << 2));
2159                 temp_dq_in_delay2 = readl(addr + (i << 2));
2160
2161                 if (shift_dq + (int32_t)temp_dq_in_delay1 >
2162                         (int32_t)IO_IO_IN_DELAY_MAX) {
2163                         shift_dq = (int32_t)IO_IO_IN_DELAY_MAX - temp_dq_in_delay2;
2164                 } else if (shift_dq + (int32_t)temp_dq_in_delay1 < 0) {
2165                         shift_dq = -(int32_t)temp_dq_in_delay1;
2166                 }
2167                 debug_cond(DLEVEL == 2, "vfifo_center: after: \
2168                            shift_dq[%u]=%d\n", i, shift_dq);
2169                 final_dq[i] = temp_dq_in_delay1 + shift_dq;
2170                 scc_mgr_set_dq_in_delay(p, final_dq[i]);
2171                 scc_mgr_load_dq(p);
2172
2173                 debug_cond(DLEVEL == 2, "vfifo_center: margin[%u]=[%d,%d]\n", i,
2174                            left_edge[i] - shift_dq + (-mid_min),
2175                            right_edge[i] + shift_dq - (-mid_min));
2176                 /* To determine values for export structures */
2177                 if (left_edge[i] - shift_dq + (-mid_min) < dq_margin)
2178                         dq_margin = left_edge[i] - shift_dq + (-mid_min);
2179
2180                 if (right_edge[i] + shift_dq - (-mid_min) < dqs_margin)
2181                         dqs_margin = right_edge[i] + shift_dq - (-mid_min);
2182         }
2183
2184         final_dqs = new_dqs;
2185         if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS)
2186                 final_dqs_en = start_dqs_en - mid_min;
2187
2188         /* Move DQS-en */
2189         if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS) {
2190                 scc_mgr_set_dqs_en_delay(read_group, final_dqs_en);
2191                 scc_mgr_load_dqs(read_group);
2192         }
2193
2194         /* Move DQS */
2195         scc_mgr_set_dqs_bus_in_delay(read_group, final_dqs);
2196         scc_mgr_load_dqs(read_group);
2197         debug_cond(DLEVEL == 2, "%s:%d vfifo_center: dq_margin=%d \
2198                    dqs_margin=%d", __func__, __LINE__,
2199                    dq_margin, dqs_margin);
2200
2201         /*
2202          * Do not remove this line as it makes sure all of our decisions
2203          * have been applied. Apply the update bit.
2204          */
2205         writel(0, &sdr_scc_mgr->update);
2206
2207         return (dq_margin >= 0) && (dqs_margin >= 0);
2208 }
2209
2210 /*
2211  * calibrate the read valid prediction FIFO.
2212  *
2213  *  - read valid prediction will consist of finding a good DQS enable phase,
2214  * DQS enable delay, DQS input phase, and DQS input delay.
2215  *  - we also do a per-bit deskew on the DQ lines.
2216  */
2217 static uint32_t rw_mgr_mem_calibrate_vfifo(uint32_t read_group,
2218                                            uint32_t test_bgn)
2219 {
2220         uint32_t p, d, rank_bgn, sr;
2221         uint32_t dtaps_per_ptap;
2222         uint32_t tmp_delay;
2223         uint32_t bit_chk;
2224         uint32_t grp_calibrated;
2225         uint32_t write_group, write_test_bgn;
2226         uint32_t failed_substage;
2227
2228         debug("%s:%d: %u %u\n", __func__, __LINE__, read_group, test_bgn);
2229
2230         /* update info for sims */
2231         reg_file_set_stage(CAL_STAGE_VFIFO);
2232
2233         write_group = read_group;
2234         write_test_bgn = test_bgn;
2235
2236         /* USER Determine number of delay taps for each phase tap */
2237         dtaps_per_ptap = 0;
2238         tmp_delay = 0;
2239         while (tmp_delay < IO_DELAY_PER_OPA_TAP) {
2240                 dtaps_per_ptap++;
2241                 tmp_delay += IO_DELAY_PER_DQS_EN_DCHAIN_TAP;
2242         }
2243         dtaps_per_ptap--;
2244         tmp_delay = 0;
2245
2246         /* update info for sims */
2247         reg_file_set_group(read_group);
2248
2249         grp_calibrated = 0;
2250
2251         reg_file_set_sub_stage(CAL_SUBSTAGE_GUARANTEED_READ);
2252         failed_substage = CAL_SUBSTAGE_GUARANTEED_READ;
2253
2254         for (d = 0; d <= dtaps_per_ptap && grp_calibrated == 0; d += 2) {
2255                 /*
2256                  * In RLDRAMX we may be messing the delay of pins in
2257                  * the same write group but outside of the current read
2258                  * the group, but that's ok because we haven't
2259                  * calibrated output side yet.
2260                  */
2261                 if (d > 0) {
2262                         scc_mgr_apply_group_all_out_delay_add_all_ranks
2263                         (write_group, write_test_bgn, d);
2264                 }
2265
2266                 for (p = 0; p <= IO_DQDQS_OUT_PHASE_MAX && grp_calibrated == 0;
2267                         p++) {
2268                         /* set a particular dqdqs phase */
2269                         scc_mgr_set_dqdqs_output_phase_all_ranks(read_group, p);
2270
2271                         debug_cond(DLEVEL == 1, "%s:%d calibrate_vfifo: g=%u \
2272                                    p=%u d=%u\n", __func__, __LINE__,
2273                                    read_group, p, d);
2274
2275                         /*
2276                          * Load up the patterns used by read calibration
2277                          * using current DQDQS phase.
2278                          */
2279                         rw_mgr_mem_calibrate_read_load_patterns(0, 1);
2280                         if (!(gbl->phy_debug_mode_flags &
2281                                 PHY_DEBUG_DISABLE_GUARANTEED_READ)) {
2282                                 if (!rw_mgr_mem_calibrate_read_test_patterns_all_ranks
2283                                     (read_group, 1, &bit_chk)) {
2284                                         debug_cond(DLEVEL == 1, "%s:%d Guaranteed read test failed:",
2285                                                    __func__, __LINE__);
2286                                         debug_cond(DLEVEL == 1, " g=%u p=%u d=%u\n",
2287                                                    read_group, p, d);
2288                                         break;
2289                                 }
2290                         }
2291
2292 /* case:56390 */
2293                         grp_calibrated = 1;
2294                 if (rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase_sweep_dq_in_delay
2295                     (write_group, read_group, test_bgn)) {
2296                                 /*
2297                                  * USER Read per-bit deskew can be done on a
2298                                  * per shadow register basis.
2299                                  */
2300                                 for (rank_bgn = 0, sr = 0;
2301                                         rank_bgn < RW_MGR_MEM_NUMBER_OF_RANKS;
2302                                         rank_bgn += NUM_RANKS_PER_SHADOW_REG,
2303                                         ++sr) {
2304                                         /*
2305                                          * Determine if this set of ranks
2306                                          * should be skipped entirely.
2307                                          */
2308                                         if (!param->skip_shadow_regs[sr]) {
2309                                                 /*
2310                                                  * If doing read after write
2311                                                  * calibration, do not update
2312                                                  * FOM, now - do it then.
2313                                                  */
2314                                         if (!rw_mgr_mem_calibrate_vfifo_center
2315                                                 (rank_bgn, write_group,
2316                                                 read_group, test_bgn, 1, 0)) {
2317                                                         grp_calibrated = 0;
2318                                                         failed_substage =
2319                                                 CAL_SUBSTAGE_VFIFO_CENTER;
2320                                                 }
2321                                         }
2322                                 }
2323                         } else {
2324                                 grp_calibrated = 0;
2325                                 failed_substage = CAL_SUBSTAGE_DQS_EN_PHASE;
2326                         }
2327                 }
2328         }
2329
2330         if (grp_calibrated == 0) {
2331                 set_failing_group_stage(write_group, CAL_STAGE_VFIFO,
2332                                         failed_substage);
2333                 return 0;
2334         }
2335
2336         /*
2337          * Reset the delay chains back to zero if they have moved > 1
2338          * (check for > 1 because loop will increase d even when pass in
2339          * first case).
2340          */
2341         if (d > 2)
2342                 scc_mgr_zero_group(write_group, write_test_bgn, 1);
2343
2344         return 1;
2345 }
2346
2347 /* VFIFO Calibration -- Read Deskew Calibration after write deskew */
2348 static uint32_t rw_mgr_mem_calibrate_vfifo_end(uint32_t read_group,
2349                                                uint32_t test_bgn)
2350 {
2351         uint32_t rank_bgn, sr;
2352         uint32_t grp_calibrated;
2353         uint32_t write_group;
2354
2355         debug("%s:%d %u %u", __func__, __LINE__, read_group, test_bgn);
2356
2357         /* update info for sims */
2358
2359         reg_file_set_stage(CAL_STAGE_VFIFO_AFTER_WRITES);
2360         reg_file_set_sub_stage(CAL_SUBSTAGE_VFIFO_CENTER);
2361
2362         write_group = read_group;
2363
2364         /* update info for sims */
2365         reg_file_set_group(read_group);
2366
2367         grp_calibrated = 1;
2368         /* Read per-bit deskew can be done on a per shadow register basis */
2369         for (rank_bgn = 0, sr = 0; rank_bgn < RW_MGR_MEM_NUMBER_OF_RANKS;
2370                 rank_bgn += NUM_RANKS_PER_SHADOW_REG, ++sr) {
2371                 /* Determine if this set of ranks should be skipped entirely */
2372                 if (!param->skip_shadow_regs[sr]) {
2373                 /* This is the last calibration round, update FOM here */
2374                         if (!rw_mgr_mem_calibrate_vfifo_center(rank_bgn,
2375                                                                 write_group,
2376                                                                 read_group,
2377                                                                 test_bgn, 0,
2378                                                                 1)) {
2379                                 grp_calibrated = 0;
2380                         }
2381                 }
2382         }
2383
2384
2385         if (grp_calibrated == 0) {
2386                 set_failing_group_stage(write_group,
2387                                         CAL_STAGE_VFIFO_AFTER_WRITES,
2388                                         CAL_SUBSTAGE_VFIFO_CENTER);
2389                 return 0;
2390         }
2391
2392         return 1;
2393 }
2394
2395 /* Calibrate LFIFO to find smallest read latency */
2396 static uint32_t rw_mgr_mem_calibrate_lfifo(void)
2397 {
2398         uint32_t found_one;
2399         uint32_t bit_chk;
2400
2401         debug("%s:%d\n", __func__, __LINE__);
2402
2403         /* update info for sims */
2404         reg_file_set_stage(CAL_STAGE_LFIFO);
2405         reg_file_set_sub_stage(CAL_SUBSTAGE_READ_LATENCY);
2406
2407         /* Load up the patterns used by read calibration for all ranks */
2408         rw_mgr_mem_calibrate_read_load_patterns(0, 1);
2409         found_one = 0;
2410
2411         do {
2412                 writel(gbl->curr_read_lat, &phy_mgr_cfg->phy_rlat);
2413                 debug_cond(DLEVEL == 2, "%s:%d lfifo: read_lat=%u",
2414                            __func__, __LINE__, gbl->curr_read_lat);
2415
2416                 if (!rw_mgr_mem_calibrate_read_test_all_ranks(0,
2417                                                               NUM_READ_TESTS,
2418                                                               PASS_ALL_BITS,
2419                                                               &bit_chk, 1)) {
2420                         break;
2421                 }
2422
2423                 found_one = 1;
2424                 /* reduce read latency and see if things are working */
2425                 /* correctly */
2426                 gbl->curr_read_lat--;
2427         } while (gbl->curr_read_lat > 0);
2428
2429         /* reset the fifos to get pointers to known state */
2430
2431         writel(0, &phy_mgr_cmd->fifo_reset);
2432
2433         if (found_one) {
2434                 /* add a fudge factor to the read latency that was determined */
2435                 gbl->curr_read_lat += 2;
2436                 writel(gbl->curr_read_lat, &phy_mgr_cfg->phy_rlat);
2437                 debug_cond(DLEVEL == 2, "%s:%d lfifo: success: using \
2438                            read_lat=%u\n", __func__, __LINE__,
2439                            gbl->curr_read_lat);
2440                 return 1;
2441         } else {
2442                 set_failing_group_stage(0xff, CAL_STAGE_LFIFO,
2443                                         CAL_SUBSTAGE_READ_LATENCY);
2444
2445                 debug_cond(DLEVEL == 2, "%s:%d lfifo: failed at initial \
2446                            read_lat=%u\n", __func__, __LINE__,
2447                            gbl->curr_read_lat);
2448                 return 0;
2449         }
2450 }
2451
2452 /*
2453  * issue write test command.
2454  * two variants are provided. one that just tests a write pattern and
2455  * another that tests datamask functionality.
2456  */
2457 static void rw_mgr_mem_calibrate_write_test_issue(uint32_t group,
2458                                                   uint32_t test_dm)
2459 {
2460         uint32_t mcc_instruction;
2461         uint32_t quick_write_mode = (((STATIC_CALIB_STEPS) & CALIB_SKIP_WRITES) &&
2462                 ENABLE_SUPER_QUICK_CALIBRATION);
2463         uint32_t rw_wl_nop_cycles;
2464         uint32_t addr;
2465
2466         /*
2467          * Set counter and jump addresses for the right
2468          * number of NOP cycles.
2469          * The number of supported NOP cycles can range from -1 to infinity
2470          * Three different cases are handled:
2471          *
2472          * 1. For a number of NOP cycles greater than 0, the RW Mgr looping
2473          *    mechanism will be used to insert the right number of NOPs
2474          *
2475          * 2. For a number of NOP cycles equals to 0, the micro-instruction
2476          *    issuing the write command will jump straight to the
2477          *    micro-instruction that turns on DQS (for DDRx), or outputs write
2478          *    data (for RLD), skipping
2479          *    the NOP micro-instruction all together
2480          *
2481          * 3. A number of NOP cycles equal to -1 indicates that DQS must be
2482          *    turned on in the same micro-instruction that issues the write
2483          *    command. Then we need
2484          *    to directly jump to the micro-instruction that sends out the data
2485          *
2486          * NOTE: Implementing this mechanism uses 2 RW Mgr jump-counters
2487          *       (2 and 3). One jump-counter (0) is used to perform multiple
2488          *       write-read operations.
2489          *       one counter left to issue this command in "multiple-group" mode
2490          */
2491
2492         rw_wl_nop_cycles = gbl->rw_wl_nop_cycles;
2493
2494         if (rw_wl_nop_cycles == -1) {
2495                 /*
2496                  * CNTR 2 - We want to execute the special write operation that
2497                  * turns on DQS right away and then skip directly to the
2498                  * instruction that sends out the data. We set the counter to a
2499                  * large number so that the jump is always taken.
2500                  */
2501                 writel(0xFF, &sdr_rw_load_mgr_regs->load_cntr2);
2502
2503                 /* CNTR 3 - Not used */
2504                 if (test_dm) {
2505                         mcc_instruction = RW_MGR_LFSR_WR_RD_DM_BANK_0_WL_1;
2506                         writel(RW_MGR_LFSR_WR_RD_DM_BANK_0_DATA,
2507                                &sdr_rw_load_jump_mgr_regs->load_jump_add2);
2508                         writel(RW_MGR_LFSR_WR_RD_DM_BANK_0_NOP,
2509                                &sdr_rw_load_jump_mgr_regs->load_jump_add3);
2510                 } else {
2511                         mcc_instruction = RW_MGR_LFSR_WR_RD_BANK_0_WL_1;
2512                         writel(RW_MGR_LFSR_WR_RD_BANK_0_DATA,
2513                                 &sdr_rw_load_jump_mgr_regs->load_jump_add2);
2514                         writel(RW_MGR_LFSR_WR_RD_BANK_0_NOP,
2515                                 &sdr_rw_load_jump_mgr_regs->load_jump_add3);
2516                 }
2517         } else if (rw_wl_nop_cycles == 0) {
2518                 /*
2519                  * CNTR 2 - We want to skip the NOP operation and go straight
2520                  * to the DQS enable instruction. We set the counter to a large
2521                  * number so that the jump is always taken.
2522                  */
2523                 writel(0xFF, &sdr_rw_load_mgr_regs->load_cntr2);
2524
2525                 /* CNTR 3 - Not used */
2526                 if (test_dm) {
2527                         mcc_instruction = RW_MGR_LFSR_WR_RD_DM_BANK_0;
2528                         writel(RW_MGR_LFSR_WR_RD_DM_BANK_0_DQS,
2529                                &sdr_rw_load_jump_mgr_regs->load_jump_add2);
2530                 } else {
2531                         mcc_instruction = RW_MGR_LFSR_WR_RD_BANK_0;
2532                         writel(RW_MGR_LFSR_WR_RD_BANK_0_DQS,
2533                                 &sdr_rw_load_jump_mgr_regs->load_jump_add2);
2534                 }
2535         } else {
2536                 /*
2537                  * CNTR 2 - In this case we want to execute the next instruction
2538                  * and NOT take the jump. So we set the counter to 0. The jump
2539                  * address doesn't count.
2540                  */
2541                 writel(0x0, &sdr_rw_load_mgr_regs->load_cntr2);
2542                 writel(0x0, &sdr_rw_load_jump_mgr_regs->load_jump_add2);
2543
2544                 /*
2545                  * CNTR 3 - Set the nop counter to the number of cycles we
2546                  * need to loop for, minus 1.
2547                  */
2548                 writel(rw_wl_nop_cycles - 1, &sdr_rw_load_mgr_regs->load_cntr3);
2549                 if (test_dm) {
2550                         mcc_instruction = RW_MGR_LFSR_WR_RD_DM_BANK_0;
2551                         writel(RW_MGR_LFSR_WR_RD_DM_BANK_0_NOP,
2552                                 &sdr_rw_load_jump_mgr_regs->load_jump_add3);
2553                 } else {
2554                         mcc_instruction = RW_MGR_LFSR_WR_RD_BANK_0;
2555                         writel(RW_MGR_LFSR_WR_RD_BANK_0_NOP,
2556                                 &sdr_rw_load_jump_mgr_regs->load_jump_add3);
2557                 }
2558         }
2559
2560         writel(0, SDR_PHYGRP_RWMGRGRP_ADDRESS |
2561                   RW_MGR_RESET_READ_DATAPATH_OFFSET);
2562
2563         if (quick_write_mode)
2564                 writel(0x08, &sdr_rw_load_mgr_regs->load_cntr0);
2565         else
2566                 writel(0x40, &sdr_rw_load_mgr_regs->load_cntr0);
2567
2568         writel(mcc_instruction, &sdr_rw_load_jump_mgr_regs->load_jump_add0);
2569
2570         /*
2571          * CNTR 1 - This is used to ensure enough time elapses
2572          * for read data to come back.
2573          */
2574         writel(0x30, &sdr_rw_load_mgr_regs->load_cntr1);
2575
2576         if (test_dm) {
2577                 writel(RW_MGR_LFSR_WR_RD_DM_BANK_0_WAIT,
2578                         &sdr_rw_load_jump_mgr_regs->load_jump_add1);
2579         } else {
2580                 writel(RW_MGR_LFSR_WR_RD_BANK_0_WAIT,
2581                         &sdr_rw_load_jump_mgr_regs->load_jump_add1);
2582         }
2583
2584         addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET;
2585         writel(mcc_instruction, addr + (group << 2));
2586 }
2587
2588 /* Test writes, can check for a single bit pass or multiple bit pass */
2589 static uint32_t rw_mgr_mem_calibrate_write_test(uint32_t rank_bgn,
2590         uint32_t write_group, uint32_t use_dm, uint32_t all_correct,
2591         uint32_t *bit_chk, uint32_t all_ranks)
2592 {
2593         uint32_t r;
2594         uint32_t correct_mask_vg;
2595         uint32_t tmp_bit_chk;
2596         uint32_t vg;
2597         uint32_t rank_end = all_ranks ? RW_MGR_MEM_NUMBER_OF_RANKS :
2598                 (rank_bgn + NUM_RANKS_PER_SHADOW_REG);
2599         uint32_t addr_rw_mgr;
2600         uint32_t base_rw_mgr;
2601
2602         *bit_chk = param->write_correct_mask;
2603         correct_mask_vg = param->write_correct_mask_vg;
2604
2605         for (r = rank_bgn; r < rank_end; r++) {
2606                 if (param->skip_ranks[r]) {
2607                         /* request to skip the rank */
2608                         continue;
2609                 }
2610
2611                 /* set rank */
2612                 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_READ_WRITE);
2613
2614                 tmp_bit_chk = 0;
2615                 addr_rw_mgr = SDR_PHYGRP_RWMGRGRP_ADDRESS;
2616                 for (vg = RW_MGR_MEM_VIRTUAL_GROUPS_PER_WRITE_DQS-1; ; vg--) {
2617                         /* reset the fifos to get pointers to known state */
2618                         writel(0, &phy_mgr_cmd->fifo_reset);
2619
2620                         tmp_bit_chk = tmp_bit_chk <<
2621                                 (RW_MGR_MEM_DQ_PER_WRITE_DQS /
2622                                 RW_MGR_MEM_VIRTUAL_GROUPS_PER_WRITE_DQS);
2623                         rw_mgr_mem_calibrate_write_test_issue(write_group *
2624                                 RW_MGR_MEM_VIRTUAL_GROUPS_PER_WRITE_DQS+vg,
2625                                 use_dm);
2626
2627                         base_rw_mgr = readl(addr_rw_mgr);
2628                         tmp_bit_chk = tmp_bit_chk | (correct_mask_vg & ~(base_rw_mgr));
2629                         if (vg == 0)
2630                                 break;
2631                 }
2632                 *bit_chk &= tmp_bit_chk;
2633         }
2634
2635         if (all_correct) {
2636                 set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF);
2637                 debug_cond(DLEVEL == 2, "write_test(%u,%u,ALL) : %u == \
2638                            %u => %lu", write_group, use_dm,
2639                            *bit_chk, param->write_correct_mask,
2640                            (long unsigned int)(*bit_chk ==
2641                            param->write_correct_mask));
2642                 return *bit_chk == param->write_correct_mask;
2643         } else {
2644                 set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF);
2645                 debug_cond(DLEVEL == 2, "write_test(%u,%u,ONE) : %u != ",
2646                        write_group, use_dm, *bit_chk);
2647                 debug_cond(DLEVEL == 2, "%lu" " => %lu", (long unsigned int)0,
2648                         (long unsigned int)(*bit_chk != 0));
2649                 return *bit_chk != 0x00;
2650         }
2651 }
2652
2653 /*
2654  * center all windows. do per-bit-deskew to possibly increase size of
2655  * certain windows.
2656  */
2657 static uint32_t rw_mgr_mem_calibrate_writes_center(uint32_t rank_bgn,
2658         uint32_t write_group, uint32_t test_bgn)
2659 {
2660         uint32_t i, p, min_index;
2661         int32_t d;
2662         /*
2663          * Store these as signed since there are comparisons with
2664          * signed numbers.
2665          */
2666         uint32_t bit_chk;
2667         uint32_t sticky_bit_chk;
2668         int32_t left_edge[RW_MGR_MEM_DQ_PER_WRITE_DQS];
2669         int32_t right_edge[RW_MGR_MEM_DQ_PER_WRITE_DQS];
2670         int32_t mid;
2671         int32_t mid_min, orig_mid_min;
2672         int32_t new_dqs, start_dqs, shift_dq;
2673         int32_t dq_margin, dqs_margin, dm_margin;
2674         uint32_t stop;
2675         uint32_t temp_dq_out1_delay;
2676         uint32_t addr;
2677
2678         debug("%s:%d %u %u", __func__, __LINE__, write_group, test_bgn);
2679
2680         dm_margin = 0;
2681
2682         addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_IO_OUT1_DELAY_OFFSET;
2683         start_dqs = readl(addr +
2684                           (RW_MGR_MEM_DQ_PER_WRITE_DQS << 2));
2685
2686         /* per-bit deskew */
2687
2688         /*
2689          * set the left and right edge of each bit to an illegal value
2690          * use (IO_IO_OUT1_DELAY_MAX + 1) as an illegal value.
2691          */
2692         sticky_bit_chk = 0;
2693         for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) {
2694                 left_edge[i]  = IO_IO_OUT1_DELAY_MAX + 1;
2695                 right_edge[i] = IO_IO_OUT1_DELAY_MAX + 1;
2696         }
2697
2698         /* Search for the left edge of the window for each bit */
2699         for (d = 0; d <= IO_IO_OUT1_DELAY_MAX; d++) {
2700                 scc_mgr_apply_group_dq_out1_delay(write_group, test_bgn, d);
2701
2702                 writel(0, &sdr_scc_mgr->update);
2703
2704                 /*
2705                  * Stop searching when the read test doesn't pass AND when
2706                  * we've seen a passing read on every bit.
2707                  */
2708                 stop = !rw_mgr_mem_calibrate_write_test(rank_bgn, write_group,
2709                         0, PASS_ONE_BIT, &bit_chk, 0);
2710                 sticky_bit_chk = sticky_bit_chk | bit_chk;
2711                 stop = stop && (sticky_bit_chk == param->write_correct_mask);
2712                 debug_cond(DLEVEL == 2, "write_center(left): dtap=%d => %u \
2713                            == %u && %u [bit_chk= %u ]\n",
2714                         d, sticky_bit_chk, param->write_correct_mask,
2715                         stop, bit_chk);
2716
2717                 if (stop == 1) {
2718                         break;
2719                 } else {
2720                         for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) {
2721                                 if (bit_chk & 1) {
2722                                         /*
2723                                          * Remember a passing test as the
2724                                          * left_edge.
2725                                          */
2726                                         left_edge[i] = d;
2727                                 } else {
2728                                         /*
2729                                          * If a left edge has not been seen
2730                                          * yet, then a future passing test will
2731                                          * mark this edge as the right edge.
2732                                          */
2733                                         if (left_edge[i] ==
2734                                                 IO_IO_OUT1_DELAY_MAX + 1) {
2735                                                 right_edge[i] = -(d + 1);
2736                                         }
2737                                 }
2738                                 debug_cond(DLEVEL == 2, "write_center[l,d=%d):", d);
2739                                 debug_cond(DLEVEL == 2, "bit_chk_test=%d left_edge[%u]: %d",
2740                                            (int)(bit_chk & 1), i, left_edge[i]);
2741                                 debug_cond(DLEVEL == 2, "right_edge[%u]: %d\n", i,
2742                                        right_edge[i]);
2743                                 bit_chk = bit_chk >> 1;
2744                         }
2745                 }
2746         }
2747
2748         /* Reset DQ delay chains to 0 */
2749         scc_mgr_apply_group_dq_out1_delay(write_group, test_bgn, 0);
2750         sticky_bit_chk = 0;
2751         for (i = RW_MGR_MEM_DQ_PER_WRITE_DQS - 1;; i--) {
2752                 debug_cond(DLEVEL == 2, "%s:%d write_center: left_edge[%u]: \
2753                            %d right_edge[%u]: %d\n", __func__, __LINE__,
2754                            i, left_edge[i], i, right_edge[i]);
2755
2756                 /*
2757                  * Check for cases where we haven't found the left edge,
2758                  * which makes our assignment of the the right edge invalid.
2759                  * Reset it to the illegal value.
2760                  */
2761                 if ((left_edge[i] == IO_IO_OUT1_DELAY_MAX + 1) &&
2762                     (right_edge[i] != IO_IO_OUT1_DELAY_MAX + 1)) {
2763                         right_edge[i] = IO_IO_OUT1_DELAY_MAX + 1;
2764                         debug_cond(DLEVEL == 2, "%s:%d write_center: reset \
2765                                    right_edge[%u]: %d\n", __func__, __LINE__,
2766                                    i, right_edge[i]);
2767                 }
2768
2769                 /*
2770                  * Reset sticky bit (except for bits where we have
2771                  * seen the left edge).
2772                  */
2773                 sticky_bit_chk = sticky_bit_chk << 1;
2774                 if ((left_edge[i] != IO_IO_OUT1_DELAY_MAX + 1))
2775                         sticky_bit_chk = sticky_bit_chk | 1;
2776
2777                 if (i == 0)
2778                         break;
2779         }
2780
2781         /* Search for the right edge of the window for each bit */
2782         for (d = 0; d <= IO_IO_OUT1_DELAY_MAX - start_dqs; d++) {
2783                 scc_mgr_apply_group_dqs_io_and_oct_out1(write_group,
2784                                                         d + start_dqs);
2785
2786                 writel(0, &sdr_scc_mgr->update);
2787
2788                 /*
2789                  * Stop searching when the read test doesn't pass AND when
2790                  * we've seen a passing read on every bit.
2791                  */
2792                 stop = !rw_mgr_mem_calibrate_write_test(rank_bgn, write_group,
2793                         0, PASS_ONE_BIT, &bit_chk, 0);
2794
2795                 sticky_bit_chk = sticky_bit_chk | bit_chk;
2796                 stop = stop && (sticky_bit_chk == param->write_correct_mask);
2797
2798                 debug_cond(DLEVEL == 2, "write_center (right): dtap=%u => %u == \
2799                            %u && %u\n", d, sticky_bit_chk,
2800                            param->write_correct_mask, stop);
2801
2802                 if (stop == 1) {
2803                         if (d == 0) {
2804                                 for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS;
2805                                         i++) {
2806                                         /* d = 0 failed, but it passed when
2807                                         testing the left edge, so it must be
2808                                         marginal, set it to -1 */
2809                                         if (right_edge[i] ==
2810                                                 IO_IO_OUT1_DELAY_MAX + 1 &&
2811                                                 left_edge[i] !=
2812                                                 IO_IO_OUT1_DELAY_MAX + 1) {
2813                                                 right_edge[i] = -1;
2814                                         }
2815                                 }
2816                         }
2817                         break;
2818                 } else {
2819                         for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) {
2820                                 if (bit_chk & 1) {
2821                                         /*
2822                                          * Remember a passing test as
2823                                          * the right_edge.
2824                                          */
2825                                         right_edge[i] = d;
2826                                 } else {
2827                                         if (d != 0) {
2828                                                 /*
2829                                                  * If a right edge has not
2830                                                  * been seen yet, then a future
2831                                                  * passing test will mark this
2832                                                  * edge as the left edge.
2833                                                  */
2834                                                 if (right_edge[i] ==
2835                                                     IO_IO_OUT1_DELAY_MAX + 1)
2836                                                         left_edge[i] = -(d + 1);
2837                                         } else {
2838                                                 /*
2839                                                  * d = 0 failed, but it passed
2840                                                  * when testing the left edge,
2841                                                  * so it must be marginal, set
2842                                                  * it to -1.
2843                                                  */
2844                                                 if (right_edge[i] ==
2845                                                     IO_IO_OUT1_DELAY_MAX + 1 &&
2846                                                     left_edge[i] !=
2847                                                     IO_IO_OUT1_DELAY_MAX + 1)
2848                                                         right_edge[i] = -1;
2849                                                 /*
2850                                                  * If a right edge has not been
2851                                                  * seen yet, then a future
2852                                                  * passing test will mark this
2853                                                  * edge as the left edge.
2854                                                  */
2855                                                 else if (right_edge[i] ==
2856                                                         IO_IO_OUT1_DELAY_MAX +
2857                                                         1)
2858                                                         left_edge[i] = -(d + 1);
2859                                         }
2860                                 }
2861                                 debug_cond(DLEVEL == 2, "write_center[r,d=%d):", d);
2862                                 debug_cond(DLEVEL == 2, "bit_chk_test=%d left_edge[%u]: %d",
2863                                            (int)(bit_chk & 1), i, left_edge[i]);
2864                                 debug_cond(DLEVEL == 2, "right_edge[%u]: %d\n", i,
2865                                            right_edge[i]);
2866                                 bit_chk = bit_chk >> 1;
2867                         }
2868                 }
2869         }
2870
2871         /* Check that all bits have a window */
2872         for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) {
2873                 debug_cond(DLEVEL == 2, "%s:%d write_center: left_edge[%u]: \
2874                            %d right_edge[%u]: %d", __func__, __LINE__,
2875                            i, left_edge[i], i, right_edge[i]);
2876                 if ((left_edge[i] == IO_IO_OUT1_DELAY_MAX + 1) ||
2877                     (right_edge[i] == IO_IO_OUT1_DELAY_MAX + 1)) {
2878                         set_failing_group_stage(test_bgn + i,
2879                                                 CAL_STAGE_WRITES,
2880                                                 CAL_SUBSTAGE_WRITES_CENTER);
2881                         return 0;
2882                 }
2883         }
2884
2885         /* Find middle of window for each DQ bit */
2886         mid_min = left_edge[0] - right_edge[0];
2887         min_index = 0;
2888         for (i = 1; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) {
2889                 mid = left_edge[i] - right_edge[i];
2890                 if (mid < mid_min) {
2891                         mid_min = mid;
2892                         min_index = i;
2893                 }
2894         }
2895
2896         /*
2897          * -mid_min/2 represents the amount that we need to move DQS.
2898          * If mid_min is odd and positive we'll need to add one to
2899          * make sure the rounding in further calculations is correct
2900          * (always bias to the right), so just add 1 for all positive values.
2901          */
2902         if (mid_min > 0)
2903                 mid_min++;
2904         mid_min = mid_min / 2;
2905         debug_cond(DLEVEL == 1, "%s:%d write_center: mid_min=%d\n", __func__,
2906                    __LINE__, mid_min);
2907
2908         /* Determine the amount we can change DQS (which is -mid_min) */
2909         orig_mid_min = mid_min;
2910         new_dqs = start_dqs;
2911         mid_min = 0;
2912         debug_cond(DLEVEL == 1, "%s:%d write_center: start_dqs=%d new_dqs=%d \
2913                    mid_min=%d\n", __func__, __LINE__, start_dqs, new_dqs, mid_min);
2914         /* Initialize data for export structures */
2915         dqs_margin = IO_IO_OUT1_DELAY_MAX + 1;
2916         dq_margin  = IO_IO_OUT1_DELAY_MAX + 1;
2917
2918         /* add delay to bring centre of all DQ windows to the same "level" */
2919         for (i = 0, p = test_bgn; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++, p++) {
2920                 /* Use values before divide by 2 to reduce round off error */
2921                 shift_dq = (left_edge[i] - right_edge[i] -
2922                         (left_edge[min_index] - right_edge[min_index]))/2  +
2923                 (orig_mid_min - mid_min);
2924
2925                 debug_cond(DLEVEL == 2, "%s:%d write_center: before: shift_dq \
2926                            [%u]=%d\n", __func__, __LINE__, i, shift_dq);
2927
2928                 addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_IO_OUT1_DELAY_OFFSET;
2929                 temp_dq_out1_delay = readl(addr + (i << 2));
2930                 if (shift_dq + (int32_t)temp_dq_out1_delay >
2931                         (int32_t)IO_IO_OUT1_DELAY_MAX) {
2932                         shift_dq = (int32_t)IO_IO_OUT1_DELAY_MAX - temp_dq_out1_delay;
2933                 } else if (shift_dq + (int32_t)temp_dq_out1_delay < 0) {
2934                         shift_dq = -(int32_t)temp_dq_out1_delay;
2935                 }
2936                 debug_cond(DLEVEL == 2, "write_center: after: shift_dq[%u]=%d\n",
2937                            i, shift_dq);
2938                 scc_mgr_set_dq_out1_delay(i, temp_dq_out1_delay + shift_dq);
2939                 scc_mgr_load_dq(i);
2940
2941                 debug_cond(DLEVEL == 2, "write_center: margin[%u]=[%d,%d]\n", i,
2942                            left_edge[i] - shift_dq + (-mid_min),
2943                            right_edge[i] + shift_dq - (-mid_min));
2944                 /* To determine values for export structures */
2945                 if (left_edge[i] - shift_dq + (-mid_min) < dq_margin)
2946                         dq_margin = left_edge[i] - shift_dq + (-mid_min);
2947
2948                 if (right_edge[i] + shift_dq - (-mid_min) < dqs_margin)
2949                         dqs_margin = right_edge[i] + shift_dq - (-mid_min);
2950         }
2951
2952         /* Move DQS */
2953         scc_mgr_apply_group_dqs_io_and_oct_out1(write_group, new_dqs);
2954         writel(0, &sdr_scc_mgr->update);
2955
2956         /* Centre DM */
2957         debug_cond(DLEVEL == 2, "%s:%d write_center: DM\n", __func__, __LINE__);
2958
2959         /*
2960          * set the left and right edge of each bit to an illegal value,
2961          * use (IO_IO_OUT1_DELAY_MAX + 1) as an illegal value,
2962          */
2963         left_edge[0]  = IO_IO_OUT1_DELAY_MAX + 1;
2964         right_edge[0] = IO_IO_OUT1_DELAY_MAX + 1;
2965         int32_t bgn_curr = IO_IO_OUT1_DELAY_MAX + 1;
2966         int32_t end_curr = IO_IO_OUT1_DELAY_MAX + 1;
2967         int32_t bgn_best = IO_IO_OUT1_DELAY_MAX + 1;
2968         int32_t end_best = IO_IO_OUT1_DELAY_MAX + 1;
2969         int32_t win_best = 0;
2970
2971         /* Search for the/part of the window with DM shift */
2972         for (d = IO_IO_OUT1_DELAY_MAX; d >= 0; d -= DELTA_D) {
2973                 scc_mgr_apply_group_dm_out1_delay(write_group, d);
2974                 writel(0, &sdr_scc_mgr->update);
2975
2976                 if (rw_mgr_mem_calibrate_write_test(rank_bgn, write_group, 1,
2977                                                     PASS_ALL_BITS, &bit_chk,
2978                                                     0)) {
2979                         /* USE Set current end of the window */
2980                         end_curr = -d;
2981                         /*
2982                          * If a starting edge of our window has not been seen
2983                          * this is our current start of the DM window.
2984                          */
2985                         if (bgn_curr == IO_IO_OUT1_DELAY_MAX + 1)
2986                                 bgn_curr = -d;
2987
2988                         /*
2989                          * If current window is bigger than best seen.
2990                          * Set best seen to be current window.
2991                          */
2992                         if ((end_curr-bgn_curr+1) > win_best) {
2993                                 win_best = end_curr-bgn_curr+1;
2994                                 bgn_best = bgn_curr;
2995                                 end_best = end_curr;
2996                         }
2997                 } else {
2998                         /* We just saw a failing test. Reset temp edge */
2999                         bgn_curr = IO_IO_OUT1_DELAY_MAX + 1;
3000                         end_curr = IO_IO_OUT1_DELAY_MAX + 1;
3001                         }
3002                 }
3003
3004
3005         /* Reset DM delay chains to 0 */
3006         scc_mgr_apply_group_dm_out1_delay(write_group, 0);
3007
3008         /*
3009          * Check to see if the current window nudges up aganist 0 delay.
3010          * If so we need to continue the search by shifting DQS otherwise DQS
3011          * search begins as a new search. */
3012         if (end_curr != 0) {
3013                 bgn_curr = IO_IO_OUT1_DELAY_MAX + 1;
3014                 end_curr = IO_IO_OUT1_DELAY_MAX + 1;
3015         }
3016
3017         /* Search for the/part of the window with DQS shifts */
3018         for (d = 0; d <= IO_IO_OUT1_DELAY_MAX - new_dqs; d += DELTA_D) {
3019                 /*
3020                  * Note: This only shifts DQS, so are we limiting ourselve to
3021                  * width of DQ unnecessarily.
3022                  */
3023                 scc_mgr_apply_group_dqs_io_and_oct_out1(write_group,
3024                                                         d + new_dqs);
3025
3026                 writel(0, &sdr_scc_mgr->update);
3027                 if (rw_mgr_mem_calibrate_write_test(rank_bgn, write_group, 1,
3028                                                     PASS_ALL_BITS, &bit_chk,
3029                                                     0)) {
3030                         /* USE Set current end of the window */
3031                         end_curr = d;
3032                         /*
3033                          * If a beginning edge of our window has not been seen
3034                          * this is our current begin of the DM window.
3035                          */
3036                         if (bgn_curr == IO_IO_OUT1_DELAY_MAX + 1)
3037                                 bgn_curr = d;
3038
3039                         /*
3040                          * If current window is bigger than best seen. Set best
3041                          * seen to be current window.
3042                          */
3043                         if ((end_curr-bgn_curr+1) > win_best) {
3044                                 win_best = end_curr-bgn_curr+1;
3045                                 bgn_best = bgn_curr;
3046                                 end_best = end_curr;
3047                         }
3048                 } else {
3049                         /* We just saw a failing test. Reset temp edge */
3050                         bgn_curr = IO_IO_OUT1_DELAY_MAX + 1;
3051                         end_curr = IO_IO_OUT1_DELAY_MAX + 1;
3052
3053                         /* Early exit optimization: if ther remaining delay
3054                         chain space is less than already seen largest window
3055                         we can exit */
3056                         if ((win_best-1) >
3057                                 (IO_IO_OUT1_DELAY_MAX - new_dqs - d)) {
3058                                         break;
3059                                 }
3060                         }
3061                 }
3062
3063         /* assign left and right edge for cal and reporting; */
3064         left_edge[0] = -1*bgn_best;
3065         right_edge[0] = end_best;
3066
3067         debug_cond(DLEVEL == 2, "%s:%d dm_calib: left=%d right=%d\n", __func__,
3068                    __LINE__, left_edge[0], right_edge[0]);
3069
3070         /* Move DQS (back to orig) */
3071         scc_mgr_apply_group_dqs_io_and_oct_out1(write_group, new_dqs);
3072
3073         /* Move DM */
3074
3075         /* Find middle of window for the DM bit */
3076         mid = (left_edge[0] - right_edge[0]) / 2;
3077
3078         /* only move right, since we are not moving DQS/DQ */
3079         if (mid < 0)
3080                 mid = 0;
3081
3082         /* dm_marign should fail if we never find a window */
3083         if (win_best == 0)
3084                 dm_margin = -1;
3085         else
3086                 dm_margin = left_edge[0] - mid;
3087
3088         scc_mgr_apply_group_dm_out1_delay(write_group, mid);
3089         writel(0, &sdr_scc_mgr->update);
3090
3091         debug_cond(DLEVEL == 2, "%s:%d dm_calib: left=%d right=%d mid=%d \
3092                    dm_margin=%d\n", __func__, __LINE__, left_edge[0],
3093                    right_edge[0], mid, dm_margin);
3094         /* Export values */
3095         gbl->fom_out += dq_margin + dqs_margin;
3096
3097         debug_cond(DLEVEL == 2, "%s:%d write_center: dq_margin=%d \
3098                    dqs_margin=%d dm_margin=%d\n", __func__, __LINE__,
3099                    dq_margin, dqs_margin, dm_margin);
3100
3101         /*
3102          * Do not remove this line as it makes sure all of our
3103          * decisions have been applied.
3104          */
3105         writel(0, &sdr_scc_mgr->update);
3106         return (dq_margin >= 0) && (dqs_margin >= 0) && (dm_margin >= 0);
3107 }
3108
3109 /* calibrate the write operations */
3110 static uint32_t rw_mgr_mem_calibrate_writes(uint32_t rank_bgn, uint32_t g,
3111         uint32_t test_bgn)
3112 {
3113         /* update info for sims */
3114         debug("%s:%d %u %u\n", __func__, __LINE__, g, test_bgn);
3115
3116         reg_file_set_stage(CAL_STAGE_WRITES);
3117         reg_file_set_sub_stage(CAL_SUBSTAGE_WRITES_CENTER);
3118
3119         reg_file_set_group(g);
3120
3121         if (!rw_mgr_mem_calibrate_writes_center(rank_bgn, g, test_bgn)) {
3122                 set_failing_group_stage(g, CAL_STAGE_WRITES,
3123                                         CAL_SUBSTAGE_WRITES_CENTER);
3124                 return 0;
3125         }
3126
3127         return 1;
3128 }
3129
3130 /* precharge all banks and activate row 0 in bank "000..." and bank "111..." */
3131 static void mem_precharge_and_activate(void)
3132 {
3133         uint32_t r;
3134
3135         for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; r++) {
3136                 if (param->skip_ranks[r]) {
3137                         /* request to skip the rank */
3138                         continue;
3139                 }
3140
3141                 /* set rank */
3142                 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_OFF);
3143
3144                 /* precharge all banks ... */
3145                 writel(RW_MGR_PRECHARGE_ALL, SDR_PHYGRP_RWMGRGRP_ADDRESS |
3146                                              RW_MGR_RUN_SINGLE_GROUP_OFFSET);
3147
3148                 writel(0x0F, &sdr_rw_load_mgr_regs->load_cntr0);
3149                 writel(RW_MGR_ACTIVATE_0_AND_1_WAIT1,
3150                         &sdr_rw_load_jump_mgr_regs->load_jump_add0);
3151
3152                 writel(0x0F, &sdr_rw_load_mgr_regs->load_cntr1);
3153                 writel(RW_MGR_ACTIVATE_0_AND_1_WAIT2,
3154                         &sdr_rw_load_jump_mgr_regs->load_jump_add1);
3155
3156                 /* activate rows */
3157                 writel(RW_MGR_ACTIVATE_0_AND_1, SDR_PHYGRP_RWMGRGRP_ADDRESS |
3158                                                 RW_MGR_RUN_SINGLE_GROUP_OFFSET);
3159         }
3160 }
3161
3162 /* Configure various memory related parameters. */
3163 static void mem_config(void)
3164 {
3165         uint32_t rlat, wlat;
3166         uint32_t rw_wl_nop_cycles;
3167         uint32_t max_latency;
3168
3169         debug("%s:%d\n", __func__, __LINE__);
3170         /* read in write and read latency */
3171         wlat = readl(&data_mgr->t_wl_add);
3172         wlat += readl(&data_mgr->mem_t_add);
3173
3174         /* WL for hard phy does not include additive latency */
3175
3176         /*
3177          * add addtional write latency to offset the address/command extra
3178          * clock cycle. We change the AC mux setting causing AC to be delayed
3179          * by one mem clock cycle. Only do this for DDR3
3180          */
3181         wlat = wlat + 1;
3182
3183         rlat = readl(&data_mgr->t_rl_add);
3184
3185         rw_wl_nop_cycles = wlat - 2;
3186         gbl->rw_wl_nop_cycles = rw_wl_nop_cycles;
3187
3188         /*
3189          * For AV/CV, lfifo is hardened and always runs at full rate so
3190          * max latency in AFI clocks, used here, is correspondingly smaller.
3191          */
3192         max_latency = (1<<MAX_LATENCY_COUNT_WIDTH)/1 - 1;
3193         /* configure for a burst length of 8 */
3194
3195         /* write latency */
3196         /* Adjust Write Latency for Hard PHY */
3197         wlat = wlat + 1;
3198
3199         /* set a pretty high read latency initially */
3200         gbl->curr_read_lat = rlat + 16;
3201
3202         if (gbl->curr_read_lat > max_latency)
3203                 gbl->curr_read_lat = max_latency;
3204
3205         writel(gbl->curr_read_lat, &phy_mgr_cfg->phy_rlat);
3206
3207         /* advertise write latency */
3208         gbl->curr_write_lat = wlat;
3209         writel(wlat - 2, &phy_mgr_cfg->afi_wlat);
3210
3211         /* initialize bit slips */
3212         mem_precharge_and_activate();
3213 }
3214
3215 /* Set VFIFO and LFIFO to instant-on settings in skip calibration mode */
3216 static void mem_skip_calibrate(void)
3217 {
3218         uint32_t vfifo_offset;
3219         uint32_t i, j, r;
3220
3221         debug("%s:%d\n", __func__, __LINE__);
3222         /* Need to update every shadow register set used by the interface */
3223         for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS;
3224                 r += NUM_RANKS_PER_SHADOW_REG) {
3225                 /*
3226                  * Set output phase alignment settings appropriate for
3227                  * skip calibration.
3228                  */
3229                 for (i = 0; i < RW_MGR_MEM_IF_READ_DQS_WIDTH; i++) {
3230                         scc_mgr_set_dqs_en_phase(i, 0);
3231 #if IO_DLL_CHAIN_LENGTH == 6
3232                         scc_mgr_set_dqdqs_output_phase(i, 6);
3233 #else
3234                         scc_mgr_set_dqdqs_output_phase(i, 7);
3235 #endif
3236                         /*
3237                          * Case:33398
3238                          *
3239                          * Write data arrives to the I/O two cycles before write
3240                          * latency is reached (720 deg).
3241                          *   -> due to bit-slip in a/c bus
3242                          *   -> to allow board skew where dqs is longer than ck
3243                          *      -> how often can this happen!?
3244                          *      -> can claim back some ptaps for high freq
3245                          *       support if we can relax this, but i digress...
3246                          *
3247                          * The write_clk leads mem_ck by 90 deg
3248                          * The minimum ptap of the OPA is 180 deg
3249                          * Each ptap has (360 / IO_DLL_CHAIN_LENGH) deg of delay
3250                          * The write_clk is always delayed by 2 ptaps
3251                          *
3252                          * Hence, to make DQS aligned to CK, we need to delay
3253                          * DQS by:
3254                          *    (720 - 90 - 180 - 2 * (360 / IO_DLL_CHAIN_LENGTH))
3255                          *
3256                          * Dividing the above by (360 / IO_DLL_CHAIN_LENGTH)
3257                          * gives us the number of ptaps, which simplies to:
3258                          *
3259                          *    (1.25 * IO_DLL_CHAIN_LENGTH - 2)
3260                          */
3261                         scc_mgr_set_dqdqs_output_phase(i, (1.25 *
3262                                 IO_DLL_CHAIN_LENGTH - 2));
3263                 }
3264                 writel(0xff, &sdr_scc_mgr->dqs_ena);
3265                 writel(0xff, &sdr_scc_mgr->dqs_io_ena);
3266
3267                 for (i = 0; i < RW_MGR_MEM_IF_WRITE_DQS_WIDTH; i++) {
3268                         writel(i, SDR_PHYGRP_SCCGRP_ADDRESS |
3269                                   SCC_MGR_GROUP_COUNTER_OFFSET);
3270                 }
3271                 writel(0xff, &sdr_scc_mgr->dq_ena);
3272                 writel(0xff, &sdr_scc_mgr->dm_ena);
3273                 writel(0, &sdr_scc_mgr->update);
3274         }
3275
3276         /* Compensate for simulation model behaviour */
3277         for (i = 0; i < RW_MGR_MEM_IF_READ_DQS_WIDTH; i++) {
3278                 scc_mgr_set_dqs_bus_in_delay(i, 10);
3279                 scc_mgr_load_dqs(i);
3280         }
3281         writel(0, &sdr_scc_mgr->update);
3282
3283         /*
3284          * ArriaV has hard FIFOs that can only be initialized by incrementing
3285          * in sequencer.
3286          */
3287         vfifo_offset = CALIB_VFIFO_OFFSET;
3288         for (j = 0; j < vfifo_offset; j++) {
3289                 writel(0xff, &phy_mgr_cmd->inc_vfifo_hard_phy);
3290         }
3291         writel(0, &phy_mgr_cmd->fifo_reset);
3292
3293         /*
3294          * For ACV with hard lfifo, we get the skip-cal setting from
3295          * generation-time constant.
3296          */
3297         gbl->curr_read_lat = CALIB_LFIFO_OFFSET;
3298         writel(gbl->curr_read_lat, &phy_mgr_cfg->phy_rlat);
3299 }
3300
3301 /* Memory calibration entry point */
3302 static uint32_t mem_calibrate(void)
3303 {
3304         uint32_t i;
3305         uint32_t rank_bgn, sr;
3306         uint32_t write_group, write_test_bgn;
3307         uint32_t read_group, read_test_bgn;
3308         uint32_t run_groups, current_run;
3309         uint32_t failing_groups = 0;
3310         uint32_t group_failed = 0;
3311         uint32_t sr_failed = 0;
3312
3313         debug("%s:%d\n", __func__, __LINE__);
3314         /* Initialize the data settings */
3315
3316         gbl->error_substage = CAL_SUBSTAGE_NIL;
3317         gbl->error_stage = CAL_STAGE_NIL;
3318         gbl->error_group = 0xff;
3319         gbl->fom_in = 0;
3320         gbl->fom_out = 0;
3321
3322         mem_config();
3323
3324         uint32_t bypass_mode = 0x1;
3325         for (i = 0; i < RW_MGR_MEM_IF_READ_DQS_WIDTH; i++) {
3326                 writel(i, SDR_PHYGRP_SCCGRP_ADDRESS |
3327                           SCC_MGR_GROUP_COUNTER_OFFSET);
3328                 scc_set_bypass_mode(i, bypass_mode);
3329         }
3330
3331         if ((dyn_calib_steps & CALIB_SKIP_ALL) == CALIB_SKIP_ALL) {
3332                 /*
3333                  * Set VFIFO and LFIFO to instant-on settings in skip
3334                  * calibration mode.
3335                  */
3336                 mem_skip_calibrate();
3337         } else {
3338                 for (i = 0; i < NUM_CALIB_REPEAT; i++) {
3339                         /*
3340                          * Zero all delay chain/phase settings for all
3341                          * groups and all shadow register sets.
3342                          */
3343                         scc_mgr_zero_all();
3344
3345                         run_groups = ~param->skip_groups;
3346
3347                         for (write_group = 0, write_test_bgn = 0; write_group
3348                                 < RW_MGR_MEM_IF_WRITE_DQS_WIDTH; write_group++,
3349                                 write_test_bgn += RW_MGR_MEM_DQ_PER_WRITE_DQS) {
3350                                 /* Initialized the group failure */
3351                                 group_failed = 0;
3352
3353                                 current_run = run_groups & ((1 <<
3354                                         RW_MGR_NUM_DQS_PER_WRITE_GROUP) - 1);
3355                                 run_groups = run_groups >>
3356                                         RW_MGR_NUM_DQS_PER_WRITE_GROUP;
3357
3358                                 if (current_run == 0)
3359                                         continue;
3360
3361                                 writel(write_group, SDR_PHYGRP_SCCGRP_ADDRESS |
3362                                                     SCC_MGR_GROUP_COUNTER_OFFSET);
3363                                 scc_mgr_zero_group(write_group, write_test_bgn,
3364                                                    0);
3365
3366                                 for (read_group = write_group *
3367                                         RW_MGR_MEM_IF_READ_DQS_WIDTH /
3368                                         RW_MGR_MEM_IF_WRITE_DQS_WIDTH,
3369                                         read_test_bgn = 0;
3370                                         read_group < (write_group + 1) *
3371                                         RW_MGR_MEM_IF_READ_DQS_WIDTH /
3372                                         RW_MGR_MEM_IF_WRITE_DQS_WIDTH &&
3373                                         group_failed == 0;
3374                                         read_group++, read_test_bgn +=
3375                                         RW_MGR_MEM_DQ_PER_READ_DQS) {
3376                                         /* Calibrate the VFIFO */
3377                                         if (!((STATIC_CALIB_STEPS) &
3378                                                 CALIB_SKIP_VFIFO)) {
3379                                                 if (!rw_mgr_mem_calibrate_vfifo
3380                                                         (read_group,
3381                                                         read_test_bgn)) {
3382                                                         group_failed = 1;
3383
3384                                                         if (!(gbl->
3385                                                         phy_debug_mode_flags &
3386                                                 PHY_DEBUG_SWEEP_ALL_GROUPS)) {
3387                                                                 return 0;
3388                                                         }
3389                                                 }
3390                                         }
3391                                 }
3392
3393                                 /* Calibrate the output side */
3394                                 if (group_failed == 0)  {
3395                                         for (rank_bgn = 0, sr = 0; rank_bgn
3396                                                 < RW_MGR_MEM_NUMBER_OF_RANKS;
3397                                                 rank_bgn +=
3398                                                 NUM_RANKS_PER_SHADOW_REG,
3399                                                 ++sr) {
3400                                                 sr_failed = 0;
3401                                                 if (!((STATIC_CALIB_STEPS) &
3402                                                 CALIB_SKIP_WRITES)) {
3403                                                         if ((STATIC_CALIB_STEPS)
3404                                                 & CALIB_SKIP_DELAY_SWEEPS) {
3405                                                 /* not needed in quick mode! */
3406                                                         } else {
3407                                                 /*
3408                                                  * Determine if this set of
3409                                                  * ranks should be skipped
3410                                                  * entirely.
3411                                                  */
3412                                         if (!param->skip_shadow_regs[sr]) {
3413                                                 if (!rw_mgr_mem_calibrate_writes
3414                                                 (rank_bgn, write_group,
3415                                                 write_test_bgn)) {
3416                                                         sr_failed = 1;
3417                                                         if (!(gbl->
3418                                                         phy_debug_mode_flags &
3419                                                 PHY_DEBUG_SWEEP_ALL_GROUPS)) {
3420                                                                 return 0;
3421                                                                         }
3422                                                                         }
3423                                                                 }
3424                                                         }
3425                                                 }
3426                                                 if (sr_failed != 0)
3427                                                         group_failed = 1;
3428                                         }
3429                                 }
3430
3431                                 if (group_failed == 0) {
3432                                         for (read_group = write_group *
3433                                         RW_MGR_MEM_IF_READ_DQS_WIDTH /
3434                                         RW_MGR_MEM_IF_WRITE_DQS_WIDTH,
3435                                         read_test_bgn = 0;
3436                                                 read_group < (write_group + 1)
3437                                                 * RW_MGR_MEM_IF_READ_DQS_WIDTH
3438                                                 / RW_MGR_MEM_IF_WRITE_DQS_WIDTH &&
3439                                                 group_failed == 0;
3440                                                 read_group++, read_test_bgn +=
3441                                                 RW_MGR_MEM_DQ_PER_READ_DQS) {
3442                                                 if (!((STATIC_CALIB_STEPS) &
3443                                                         CALIB_SKIP_WRITES)) {
3444                                         if (!rw_mgr_mem_calibrate_vfifo_end
3445                                                 (read_group, read_test_bgn)) {
3446                                                         group_failed = 1;
3447
3448                                                 if (!(gbl->phy_debug_mode_flags
3449                                                 & PHY_DEBUG_SWEEP_ALL_GROUPS)) {
3450                                                                 return 0;
3451                                                                 }
3452                                                         }
3453                                                 }
3454                                         }
3455                                 }
3456
3457                                 if (group_failed != 0)
3458                                         failing_groups++;
3459                         }
3460
3461                         /*
3462                          * USER If there are any failing groups then report
3463                          * the failure.
3464                          */
3465                         if (failing_groups != 0)
3466                                 return 0;
3467
3468                         /* Calibrate the LFIFO */
3469                         if (!((STATIC_CALIB_STEPS) & CALIB_SKIP_LFIFO)) {
3470                                 /*
3471                                  * If we're skipping groups as part of debug,
3472                                  * don't calibrate LFIFO.
3473                                  */
3474                                 if (param->skip_groups == 0) {
3475                                         if (!rw_mgr_mem_calibrate_lfifo())
3476                                                 return 0;
3477                                 }
3478                         }
3479                 }
3480         }
3481
3482         /*
3483          * Do not remove this line as it makes sure all of our decisions
3484          * have been applied.
3485          */
3486         writel(0, &sdr_scc_mgr->update);
3487         return 1;
3488 }
3489
3490 static uint32_t run_mem_calibrate(void)
3491 {
3492         uint32_t pass;
3493         uint32_t debug_info;
3494
3495         debug("%s:%d\n", __func__, __LINE__);
3496
3497         /* Reset pass/fail status shown on afi_cal_success/fail */
3498         writel(PHY_MGR_CAL_RESET, &phy_mgr_cfg->cal_status);
3499
3500         /* stop tracking manger */
3501         uint32_t ctrlcfg = readl(&sdr_ctrl->ctrl_cfg);
3502
3503         writel(ctrlcfg & 0xFFBFFFFF, &sdr_ctrl->ctrl_cfg);
3504
3505         initialize();
3506         rw_mgr_mem_initialize();
3507
3508         pass = mem_calibrate();
3509
3510         mem_precharge_and_activate();
3511         writel(0, &phy_mgr_cmd->fifo_reset);
3512
3513         /*
3514          * Handoff:
3515          * Don't return control of the PHY back to AFI when in debug mode.
3516          */
3517         if ((gbl->phy_debug_mode_flags & PHY_DEBUG_IN_DEBUG_MODE) == 0) {
3518                 rw_mgr_mem_handoff();
3519                 /*
3520                  * In Hard PHY this is a 2-bit control:
3521                  * 0: AFI Mux Select
3522                  * 1: DDIO Mux Select
3523                  */
3524                 writel(0x2, &phy_mgr_cfg->mux_sel);
3525         }
3526
3527         writel(ctrlcfg, &sdr_ctrl->ctrl_cfg);
3528
3529         if (pass) {
3530                 printf("%s: CALIBRATION PASSED\n", __FILE__);
3531
3532                 gbl->fom_in /= 2;
3533                 gbl->fom_out /= 2;
3534
3535                 if (gbl->fom_in > 0xff)
3536                         gbl->fom_in = 0xff;
3537
3538                 if (gbl->fom_out > 0xff)
3539                         gbl->fom_out = 0xff;
3540
3541                 /* Update the FOM in the register file */
3542                 debug_info = gbl->fom_in;
3543                 debug_info |= gbl->fom_out << 8;
3544                 writel(debug_info, &sdr_reg_file->fom);
3545
3546                 writel(debug_info, &phy_mgr_cfg->cal_debug_info);
3547                 writel(PHY_MGR_CAL_SUCCESS, &phy_mgr_cfg->cal_status);
3548         } else {
3549                 printf("%s: CALIBRATION FAILED\n", __FILE__);
3550
3551                 debug_info = gbl->error_stage;
3552                 debug_info |= gbl->error_substage << 8;
3553                 debug_info |= gbl->error_group << 16;
3554
3555                 writel(debug_info, &sdr_reg_file->failing_stage);
3556                 writel(debug_info, &phy_mgr_cfg->cal_debug_info);
3557                 writel(PHY_MGR_CAL_FAIL, &phy_mgr_cfg->cal_status);
3558
3559                 /* Update the failing group/stage in the register file */
3560                 debug_info = gbl->error_stage;
3561                 debug_info |= gbl->error_substage << 8;
3562                 debug_info |= gbl->error_group << 16;
3563                 writel(debug_info, &sdr_reg_file->failing_stage);
3564         }
3565
3566         return pass;
3567 }
3568
3569 /**
3570  * hc_initialize_rom_data() - Initialize ROM data
3571  *
3572  * Initialize ROM data.
3573  */
3574 static void hc_initialize_rom_data(void)
3575 {
3576         u32 i, addr;
3577
3578         addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_INST_ROM_WRITE_OFFSET;
3579         for (i = 0; i < ARRAY_SIZE(inst_rom_init); i++)
3580                 writel(inst_rom_init[i], addr + (i << 2));
3581
3582         addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_AC_ROM_WRITE_OFFSET;
3583         for (i = 0; i < ARRAY_SIZE(ac_rom_init); i++)
3584                 writel(ac_rom_init[i], addr + (i << 2));
3585 }
3586
3587 /**
3588  * initialize_reg_file() - Initialize SDR register file
3589  *
3590  * Initialize SDR register file.
3591  */
3592 static void initialize_reg_file(void)
3593 {
3594         /* Initialize the register file with the correct data */
3595         writel(REG_FILE_INIT_SEQ_SIGNATURE, &sdr_reg_file->signature);
3596         writel(0, &sdr_reg_file->debug_data_addr);
3597         writel(0, &sdr_reg_file->cur_stage);
3598         writel(0, &sdr_reg_file->fom);
3599         writel(0, &sdr_reg_file->failing_stage);
3600         writel(0, &sdr_reg_file->debug1);
3601         writel(0, &sdr_reg_file->debug2);
3602 }
3603
3604 /**
3605  * initialize_hps_phy() - Initialize HPS PHY
3606  *
3607  * Initialize HPS PHY.
3608  */
3609 static void initialize_hps_phy(void)
3610 {
3611         uint32_t reg;
3612         /*
3613          * Tracking also gets configured here because it's in the
3614          * same register.
3615          */
3616         uint32_t trk_sample_count = 7500;
3617         uint32_t trk_long_idle_sample_count = (10 << 16) | 100;
3618         /*
3619          * Format is number of outer loops in the 16 MSB, sample
3620          * count in 16 LSB.
3621          */
3622
3623         reg = 0;
3624         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_ACDELAYEN_SET(2);
3625         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_DQDELAYEN_SET(1);
3626         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_DQSDELAYEN_SET(1);
3627         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_DQSLOGICDELAYEN_SET(1);
3628         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_RESETDELAYEN_SET(0);
3629         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_LPDDRDIS_SET(1);
3630         /*
3631          * This field selects the intrinsic latency to RDATA_EN/FULL path.
3632          * 00-bypass, 01- add 5 cycles, 10- add 10 cycles, 11- add 15 cycles.
3633          */
3634         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_ADDLATSEL_SET(0);
3635         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_SAMPLECOUNT_19_0_SET(
3636                 trk_sample_count);
3637         writel(reg, &sdr_ctrl->phy_ctrl0);
3638
3639         reg = 0;
3640         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_1_SAMPLECOUNT_31_20_SET(
3641                 trk_sample_count >>
3642                 SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_SAMPLECOUNT_19_0_WIDTH);
3643         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_1_LONGIDLESAMPLECOUNT_19_0_SET(
3644                 trk_long_idle_sample_count);
3645         writel(reg, &sdr_ctrl->phy_ctrl1);
3646
3647         reg = 0;
3648         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_2_LONGIDLESAMPLECOUNT_31_20_SET(
3649                 trk_long_idle_sample_count >>
3650                 SDR_CTRLGRP_PHYCTRL_PHYCTRL_1_LONGIDLESAMPLECOUNT_19_0_WIDTH);
3651         writel(reg, &sdr_ctrl->phy_ctrl2);
3652 }
3653
3654 static void initialize_tracking(void)
3655 {
3656         uint32_t concatenated_longidle = 0x0;
3657         uint32_t concatenated_delays = 0x0;
3658         uint32_t concatenated_rw_addr = 0x0;
3659         uint32_t concatenated_refresh = 0x0;
3660         uint32_t trk_sample_count = 7500;
3661         uint32_t dtaps_per_ptap;
3662         uint32_t tmp_delay;
3663
3664         /*
3665          * compute usable version of value in case we skip full
3666          * computation later
3667          */
3668         dtaps_per_ptap = 0;
3669         tmp_delay = 0;
3670         while (tmp_delay < IO_DELAY_PER_OPA_TAP) {
3671                 dtaps_per_ptap++;
3672                 tmp_delay += IO_DELAY_PER_DCHAIN_TAP;
3673         }
3674         dtaps_per_ptap--;
3675
3676         concatenated_longidle = concatenated_longidle ^ 10;
3677                 /*longidle outer loop */
3678         concatenated_longidle = concatenated_longidle << 16;
3679         concatenated_longidle = concatenated_longidle ^ 100;
3680                 /*longidle sample count */
3681         concatenated_delays = concatenated_delays ^ 243;
3682                 /* trfc, worst case of 933Mhz 4Gb */
3683         concatenated_delays = concatenated_delays << 8;
3684         concatenated_delays = concatenated_delays ^ 14;
3685                 /* trcd, worst case */
3686         concatenated_delays = concatenated_delays << 8;
3687         concatenated_delays = concatenated_delays ^ 10;
3688                 /* vfifo wait */
3689         concatenated_delays = concatenated_delays << 8;
3690         concatenated_delays = concatenated_delays ^ 4;
3691                 /* mux delay */
3692
3693         concatenated_rw_addr = concatenated_rw_addr ^ RW_MGR_IDLE;
3694         concatenated_rw_addr = concatenated_rw_addr << 8;
3695         concatenated_rw_addr = concatenated_rw_addr ^ RW_MGR_ACTIVATE_1;
3696         concatenated_rw_addr = concatenated_rw_addr << 8;
3697         concatenated_rw_addr = concatenated_rw_addr ^ RW_MGR_SGLE_READ;
3698         concatenated_rw_addr = concatenated_rw_addr << 8;
3699         concatenated_rw_addr = concatenated_rw_addr ^ RW_MGR_PRECHARGE_ALL;
3700
3701         concatenated_refresh = concatenated_refresh ^ RW_MGR_REFRESH_ALL;
3702         concatenated_refresh = concatenated_refresh << 24;
3703         concatenated_refresh = concatenated_refresh ^ 1000; /* trefi */
3704
3705         /* Initialize the register file with the correct data */
3706         writel(dtaps_per_ptap, &sdr_reg_file->dtaps_per_ptap);
3707         writel(trk_sample_count, &sdr_reg_file->trk_sample_count);
3708         writel(concatenated_longidle, &sdr_reg_file->trk_longidle);
3709         writel(concatenated_delays, &sdr_reg_file->delays);
3710         writel(concatenated_rw_addr, &sdr_reg_file->trk_rw_mgr_addr);
3711         writel(RW_MGR_MEM_IF_READ_DQS_WIDTH, &sdr_reg_file->trk_read_dqs_width);
3712         writel(concatenated_refresh, &sdr_reg_file->trk_rfsh);
3713 }
3714
3715 int sdram_calibration_full(void)
3716 {
3717         struct param_type my_param;
3718         struct gbl_type my_gbl;
3719         uint32_t pass;
3720         uint32_t i;
3721
3722         param = &my_param;
3723         gbl = &my_gbl;
3724
3725         /* Initialize the debug mode flags */
3726         gbl->phy_debug_mode_flags = 0;
3727         /* Set the calibration enabled by default */
3728         gbl->phy_debug_mode_flags |= PHY_DEBUG_ENABLE_CAL_RPT;
3729         /*
3730          * Only sweep all groups (regardless of fail state) by default
3731          * Set enabled read test by default.
3732          */
3733 #if DISABLE_GUARANTEED_READ
3734         gbl->phy_debug_mode_flags |= PHY_DEBUG_DISABLE_GUARANTEED_READ;
3735 #endif
3736         /* Initialize the register file */
3737         initialize_reg_file();
3738
3739         /* Initialize any PHY CSR */
3740         initialize_hps_phy();
3741
3742         scc_mgr_initialize();
3743
3744         initialize_tracking();
3745
3746         /* USER Enable all ranks, groups */
3747         for (i = 0; i < RW_MGR_MEM_NUMBER_OF_RANKS; i++)
3748                 param->skip_ranks[i] = 0;
3749         for (i = 0; i < NUM_SHADOW_REGS; ++i)
3750                 param->skip_shadow_regs[i] = 0;
3751         param->skip_groups = 0;
3752
3753         printf("%s: Preparing to start memory calibration\n", __FILE__);
3754
3755         debug("%s:%d\n", __func__, __LINE__);
3756         debug_cond(DLEVEL == 1,
3757                    "DDR3 FULL_RATE ranks=%u cs/dimm=%u dq/dqs=%u,%u vg/dqs=%u,%u ",
3758                    RW_MGR_MEM_NUMBER_OF_RANKS, RW_MGR_MEM_NUMBER_OF_CS_PER_DIMM,
3759                    RW_MGR_MEM_DQ_PER_READ_DQS, RW_MGR_MEM_DQ_PER_WRITE_DQS,
3760                    RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS,
3761                    RW_MGR_MEM_VIRTUAL_GROUPS_PER_WRITE_DQS);
3762         debug_cond(DLEVEL == 1,
3763                    "dqs=%u,%u dq=%u dm=%u ptap_delay=%u dtap_delay=%u ",
3764                    RW_MGR_MEM_IF_READ_DQS_WIDTH, RW_MGR_MEM_IF_WRITE_DQS_WIDTH,
3765                    RW_MGR_MEM_DATA_WIDTH, RW_MGR_MEM_DATA_MASK_WIDTH,
3766                    IO_DELAY_PER_OPA_TAP, IO_DELAY_PER_DCHAIN_TAP);
3767         debug_cond(DLEVEL == 1, "dtap_dqsen_delay=%u, dll=%u",
3768                    IO_DELAY_PER_DQS_EN_DCHAIN_TAP, IO_DLL_CHAIN_LENGTH);
3769         debug_cond(DLEVEL == 1, "max values: en_p=%u dqdqs_p=%u en_d=%u dqs_in_d=%u ",
3770                    IO_DQS_EN_PHASE_MAX, IO_DQDQS_OUT_PHASE_MAX,
3771                    IO_DQS_EN_DELAY_MAX, IO_DQS_IN_DELAY_MAX);
3772         debug_cond(DLEVEL == 1, "io_in_d=%u io_out1_d=%u io_out2_d=%u ",
3773                    IO_IO_IN_DELAY_MAX, IO_IO_OUT1_DELAY_MAX,
3774                    IO_IO_OUT2_DELAY_MAX);
3775         debug_cond(DLEVEL == 1, "dqs_in_reserve=%u dqs_out_reserve=%u\n",
3776                    IO_DQS_IN_RESERVE, IO_DQS_OUT_RESERVE);
3777
3778         hc_initialize_rom_data();
3779
3780         /* update info for sims */
3781         reg_file_set_stage(CAL_STAGE_NIL);
3782         reg_file_set_group(0);
3783
3784         /*
3785          * Load global needed for those actions that require
3786          * some dynamic calibration support.
3787          */
3788         dyn_calib_steps = STATIC_CALIB_STEPS;
3789         /*
3790          * Load global to allow dynamic selection of delay loop settings
3791          * based on calibration mode.
3792          */
3793         if (!(dyn_calib_steps & CALIB_SKIP_DELAY_LOOPS))
3794                 skip_delay_mask = 0xff;
3795         else
3796                 skip_delay_mask = 0x0;
3797
3798         pass = run_mem_calibrate();
3799
3800         printf("%s: Calibration complete\n", __FILE__);
3801         return pass;
3802 }