]> git.sur5r.net Git - u-boot/blob - drivers/ddr/altera/sequencer.c
ddr: altera: Clean up scc_set_bypass_mode()
[u-boot] / drivers / ddr / altera / sequencer.c
1 /*
2  * Copyright Altera Corporation (C) 2012-2015
3  *
4  * SPDX-License-Identifier:    BSD-3-Clause
5  */
6
7 #include <common.h>
8 #include <asm/io.h>
9 #include <asm/arch/sdram.h>
10 #include "sequencer.h"
11 #include "sequencer_auto.h"
12 #include "sequencer_auto_ac_init.h"
13 #include "sequencer_auto_inst_init.h"
14 #include "sequencer_defines.h"
15
16 static struct socfpga_sdr_rw_load_manager *sdr_rw_load_mgr_regs =
17         (struct socfpga_sdr_rw_load_manager *)(SDR_PHYGRP_RWMGRGRP_ADDRESS | 0x800);
18
19 static struct socfpga_sdr_rw_load_jump_manager *sdr_rw_load_jump_mgr_regs =
20         (struct socfpga_sdr_rw_load_jump_manager *)(SDR_PHYGRP_RWMGRGRP_ADDRESS | 0xC00);
21
22 static struct socfpga_sdr_reg_file *sdr_reg_file =
23         (struct socfpga_sdr_reg_file *)SDR_PHYGRP_REGFILEGRP_ADDRESS;
24
25 static struct socfpga_sdr_scc_mgr *sdr_scc_mgr =
26         (struct socfpga_sdr_scc_mgr *)(SDR_PHYGRP_SCCGRP_ADDRESS | 0xe00);
27
28 static struct socfpga_phy_mgr_cmd *phy_mgr_cmd =
29         (struct socfpga_phy_mgr_cmd *)SDR_PHYGRP_PHYMGRGRP_ADDRESS;
30
31 static struct socfpga_phy_mgr_cfg *phy_mgr_cfg =
32         (struct socfpga_phy_mgr_cfg *)(SDR_PHYGRP_PHYMGRGRP_ADDRESS | 0x40);
33
34 static struct socfpga_data_mgr *data_mgr =
35         (struct socfpga_data_mgr *)SDR_PHYGRP_DATAMGRGRP_ADDRESS;
36
37 static struct socfpga_sdr_ctrl *sdr_ctrl =
38         (struct socfpga_sdr_ctrl *)SDR_CTRLGRP_ADDRESS;
39
40 #define DELTA_D         1
41
42 /*
43  * In order to reduce ROM size, most of the selectable calibration steps are
44  * decided at compile time based on the user's calibration mode selection,
45  * as captured by the STATIC_CALIB_STEPS selection below.
46  *
47  * However, to support simulation-time selection of fast simulation mode, where
48  * we skip everything except the bare minimum, we need a few of the steps to
49  * be dynamic.  In those cases, we either use the DYNAMIC_CALIB_STEPS for the
50  * check, which is based on the rtl-supplied value, or we dynamically compute
51  * the value to use based on the dynamically-chosen calibration mode
52  */
53
54 #define DLEVEL 0
55 #define STATIC_IN_RTL_SIM 0
56 #define STATIC_SKIP_DELAY_LOOPS 0
57
58 #define STATIC_CALIB_STEPS (STATIC_IN_RTL_SIM | CALIB_SKIP_FULL_TEST | \
59         STATIC_SKIP_DELAY_LOOPS)
60
61 /* calibration steps requested by the rtl */
62 uint16_t dyn_calib_steps;
63
64 /*
65  * To make CALIB_SKIP_DELAY_LOOPS a dynamic conditional option
66  * instead of static, we use boolean logic to select between
67  * non-skip and skip values
68  *
69  * The mask is set to include all bits when not-skipping, but is
70  * zero when skipping
71  */
72
73 uint16_t skip_delay_mask;       /* mask off bits when skipping/not-skipping */
74
75 #define SKIP_DELAY_LOOP_VALUE_OR_ZERO(non_skip_value) \
76         ((non_skip_value) & skip_delay_mask)
77
78 struct gbl_type *gbl;
79 struct param_type *param;
80 uint32_t curr_shadow_reg;
81
82 static uint32_t rw_mgr_mem_calibrate_write_test(uint32_t rank_bgn,
83         uint32_t write_group, uint32_t use_dm,
84         uint32_t all_correct, uint32_t *bit_chk, uint32_t all_ranks);
85
86 static void set_failing_group_stage(uint32_t group, uint32_t stage,
87         uint32_t substage)
88 {
89         /*
90          * Only set the global stage if there was not been any other
91          * failing group
92          */
93         if (gbl->error_stage == CAL_STAGE_NIL)  {
94                 gbl->error_substage = substage;
95                 gbl->error_stage = stage;
96                 gbl->error_group = group;
97         }
98 }
99
100 static void reg_file_set_group(u16 set_group)
101 {
102         clrsetbits_le32(&sdr_reg_file->cur_stage, 0xffff0000, set_group << 16);
103 }
104
105 static void reg_file_set_stage(u8 set_stage)
106 {
107         clrsetbits_le32(&sdr_reg_file->cur_stage, 0xffff, set_stage & 0xff);
108 }
109
110 static void reg_file_set_sub_stage(u8 set_sub_stage)
111 {
112         set_sub_stage &= 0xff;
113         clrsetbits_le32(&sdr_reg_file->cur_stage, 0xff00, set_sub_stage << 8);
114 }
115
116 static void initialize(void)
117 {
118         debug("%s:%d\n", __func__, __LINE__);
119         /* USER calibration has control over path to memory */
120         /*
121          * In Hard PHY this is a 2-bit control:
122          * 0: AFI Mux Select
123          * 1: DDIO Mux Select
124          */
125         writel(0x3, &phy_mgr_cfg->mux_sel);
126
127         /* USER memory clock is not stable we begin initialization  */
128         writel(0, &phy_mgr_cfg->reset_mem_stbl);
129
130         /* USER calibration status all set to zero */
131         writel(0, &phy_mgr_cfg->cal_status);
132
133         writel(0, &phy_mgr_cfg->cal_debug_info);
134
135         if ((dyn_calib_steps & CALIB_SKIP_ALL) != CALIB_SKIP_ALL) {
136                 param->read_correct_mask_vg  = ((uint32_t)1 <<
137                         (RW_MGR_MEM_DQ_PER_READ_DQS /
138                         RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS)) - 1;
139                 param->write_correct_mask_vg = ((uint32_t)1 <<
140                         (RW_MGR_MEM_DQ_PER_READ_DQS /
141                         RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS)) - 1;
142                 param->read_correct_mask     = ((uint32_t)1 <<
143                         RW_MGR_MEM_DQ_PER_READ_DQS) - 1;
144                 param->write_correct_mask    = ((uint32_t)1 <<
145                         RW_MGR_MEM_DQ_PER_WRITE_DQS) - 1;
146                 param->dm_correct_mask       = ((uint32_t)1 <<
147                         (RW_MGR_MEM_DATA_WIDTH / RW_MGR_MEM_DATA_MASK_WIDTH))
148                         - 1;
149         }
150 }
151
152 static void set_rank_and_odt_mask(uint32_t rank, uint32_t odt_mode)
153 {
154         uint32_t odt_mask_0 = 0;
155         uint32_t odt_mask_1 = 0;
156         uint32_t cs_and_odt_mask;
157
158         if (odt_mode == RW_MGR_ODT_MODE_READ_WRITE) {
159                 if (RW_MGR_MEM_NUMBER_OF_RANKS == 1) {
160                         /*
161                          * 1 Rank
162                          * Read: ODT = 0
163                          * Write: ODT = 1
164                          */
165                         odt_mask_0 = 0x0;
166                         odt_mask_1 = 0x1;
167                 } else if (RW_MGR_MEM_NUMBER_OF_RANKS == 2) {
168                         /* 2 Ranks */
169                         if (RW_MGR_MEM_NUMBER_OF_CS_PER_DIMM == 1) {
170                                 /* - Dual-Slot , Single-Rank
171                                  * (1 chip-select per DIMM)
172                                  * OR
173                                  * - RDIMM, 4 total CS (2 CS per DIMM)
174                                  * means 2 DIMM
175                                  * Since MEM_NUMBER_OF_RANKS is 2 they are
176                                  * both single rank
177                                  * with 2 CS each (special for RDIMM)
178                                  * Read: Turn on ODT on the opposite rank
179                                  * Write: Turn on ODT on all ranks
180                                  */
181                                 odt_mask_0 = 0x3 & ~(1 << rank);
182                                 odt_mask_1 = 0x3;
183                         } else {
184                                 /*
185                                  * USER - Single-Slot , Dual-rank DIMMs
186                                  * (2 chip-selects per DIMM)
187                                  * USER Read: Turn on ODT off on all ranks
188                                  * USER Write: Turn on ODT on active rank
189                                  */
190                                 odt_mask_0 = 0x0;
191                                 odt_mask_1 = 0x3 & (1 << rank);
192                         }
193                 } else {
194                         /* 4 Ranks
195                          * Read:
196                          * ----------+-----------------------+
197                          *           |                       |
198                          *           |         ODT           |
199                          * Read From +-----------------------+
200                          *   Rank    |  3  |  2  |  1  |  0  |
201                          * ----------+-----+-----+-----+-----+
202                          *     0     |  0  |  1  |  0  |  0  |
203                          *     1     |  1  |  0  |  0  |  0  |
204                          *     2     |  0  |  0  |  0  |  1  |
205                          *     3     |  0  |  0  |  1  |  0  |
206                          * ----------+-----+-----+-----+-----+
207                          *
208                          * Write:
209                          * ----------+-----------------------+
210                          *           |                       |
211                          *           |         ODT           |
212                          * Write To  +-----------------------+
213                          *   Rank    |  3  |  2  |  1  |  0  |
214                          * ----------+-----+-----+-----+-----+
215                          *     0     |  0  |  1  |  0  |  1  |
216                          *     1     |  1  |  0  |  1  |  0  |
217                          *     2     |  0  |  1  |  0  |  1  |
218                          *     3     |  1  |  0  |  1  |  0  |
219                          * ----------+-----+-----+-----+-----+
220                          */
221                         switch (rank) {
222                         case 0:
223                                 odt_mask_0 = 0x4;
224                                 odt_mask_1 = 0x5;
225                                 break;
226                         case 1:
227                                 odt_mask_0 = 0x8;
228                                 odt_mask_1 = 0xA;
229                                 break;
230                         case 2:
231                                 odt_mask_0 = 0x1;
232                                 odt_mask_1 = 0x5;
233                                 break;
234                         case 3:
235                                 odt_mask_0 = 0x2;
236                                 odt_mask_1 = 0xA;
237                                 break;
238                         }
239                 }
240         } else {
241                 odt_mask_0 = 0x0;
242                 odt_mask_1 = 0x0;
243         }
244
245         cs_and_odt_mask =
246                 (0xFF & ~(1 << rank)) |
247                 ((0xFF & odt_mask_0) << 8) |
248                 ((0xFF & odt_mask_1) << 16);
249         writel(cs_and_odt_mask, SDR_PHYGRP_RWMGRGRP_ADDRESS |
250                                 RW_MGR_SET_CS_AND_ODT_MASK_OFFSET);
251 }
252
253 /**
254  * scc_mgr_set() - Set SCC Manager register
255  * @off:        Base offset in SCC Manager space
256  * @grp:        Read/Write group
257  * @val:        Value to be set
258  *
259  * This function sets the SCC Manager (Scan Chain Control Manager) register.
260  */
261 static void scc_mgr_set(u32 off, u32 grp, u32 val)
262 {
263         writel(val, SDR_PHYGRP_SCCGRP_ADDRESS | off | (grp << 2));
264 }
265
266 /**
267  * scc_mgr_initialize() - Initialize SCC Manager registers
268  *
269  * Initialize SCC Manager registers.
270  */
271 static void scc_mgr_initialize(void)
272 {
273         /*
274          * Clear register file for HPS. 16 (2^4) is the size of the
275          * full register file in the scc mgr:
276          *      RFILE_DEPTH = 1 + log2(MEM_DQ_PER_DQS + 1 + MEM_DM_PER_DQS +
277          *                             MEM_IF_READ_DQS_WIDTH - 1);
278          */
279         int i;
280
281         for (i = 0; i < 16; i++) {
282                 debug_cond(DLEVEL == 1, "%s:%d: Clearing SCC RFILE index %u\n",
283                            __func__, __LINE__, i);
284                 scc_mgr_set(SCC_MGR_HHP_RFILE_OFFSET, 0, i);
285         }
286 }
287
288 static void scc_mgr_set_dqdqs_output_phase(uint32_t write_group, uint32_t phase)
289 {
290         scc_mgr_set(SCC_MGR_DQDQS_OUT_PHASE_OFFSET, write_group, phase);
291 }
292
293 static void scc_mgr_set_dqs_bus_in_delay(uint32_t read_group, uint32_t delay)
294 {
295         scc_mgr_set(SCC_MGR_DQS_IN_DELAY_OFFSET, read_group, delay);
296 }
297
298 static void scc_mgr_set_dqs_en_phase(uint32_t read_group, uint32_t phase)
299 {
300         scc_mgr_set(SCC_MGR_DQS_EN_PHASE_OFFSET, read_group, phase);
301 }
302
303 static void scc_mgr_set_dqs_en_delay(uint32_t read_group, uint32_t delay)
304 {
305         scc_mgr_set(SCC_MGR_DQS_EN_DELAY_OFFSET, read_group, delay);
306 }
307
308 static void scc_mgr_set_dqs_io_in_delay(uint32_t write_group, uint32_t delay)
309 {
310         scc_mgr_set(SCC_MGR_IO_IN_DELAY_OFFSET, RW_MGR_MEM_DQ_PER_WRITE_DQS,
311                     delay);
312 }
313
314 static void scc_mgr_set_dq_in_delay(uint32_t dq_in_group, uint32_t delay)
315 {
316         scc_mgr_set(SCC_MGR_IO_IN_DELAY_OFFSET, dq_in_group, delay);
317 }
318
319 static void scc_mgr_set_dq_out1_delay(uint32_t dq_in_group, uint32_t delay)
320 {
321         scc_mgr_set(SCC_MGR_IO_OUT1_DELAY_OFFSET, dq_in_group, delay);
322 }
323
324 static void scc_mgr_set_dqs_out1_delay(uint32_t write_group,
325                                               uint32_t delay)
326 {
327         scc_mgr_set(SCC_MGR_IO_OUT1_DELAY_OFFSET, RW_MGR_MEM_DQ_PER_WRITE_DQS,
328                     delay);
329 }
330
331 static void scc_mgr_set_dm_out1_delay(uint32_t dm, uint32_t delay)
332 {
333         scc_mgr_set(SCC_MGR_IO_OUT1_DELAY_OFFSET,
334                     RW_MGR_MEM_DQ_PER_WRITE_DQS + 1 + dm,
335                     delay);
336 }
337
338 /* load up dqs config settings */
339 static void scc_mgr_load_dqs(uint32_t dqs)
340 {
341         writel(dqs, &sdr_scc_mgr->dqs_ena);
342 }
343
344 /* load up dqs io config settings */
345 static void scc_mgr_load_dqs_io(void)
346 {
347         writel(0, &sdr_scc_mgr->dqs_io_ena);
348 }
349
350 /* load up dq config settings */
351 static void scc_mgr_load_dq(uint32_t dq_in_group)
352 {
353         writel(dq_in_group, &sdr_scc_mgr->dq_ena);
354 }
355
356 /* load up dm config settings */
357 static void scc_mgr_load_dm(uint32_t dm)
358 {
359         writel(dm, &sdr_scc_mgr->dm_ena);
360 }
361
362 /**
363  * scc_mgr_set_all_ranks() - Set SCC Manager register for all ranks
364  * @off:        Base offset in SCC Manager space
365  * @grp:        Read/Write group
366  * @val:        Value to be set
367  * @update:     If non-zero, trigger SCC Manager update for all ranks
368  *
369  * This function sets the SCC Manager (Scan Chain Control Manager) register
370  * and optionally triggers the SCC update for all ranks.
371  */
372 static void scc_mgr_set_all_ranks(const u32 off, const u32 grp, const u32 val,
373                                   const int update)
374 {
375         u32 r;
376
377         for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS;
378              r += NUM_RANKS_PER_SHADOW_REG) {
379                 scc_mgr_set(off, grp, val);
380
381                 if (update || (r == 0)) {
382                         writel(grp, &sdr_scc_mgr->dqs_ena);
383                         writel(0, &sdr_scc_mgr->update);
384                 }
385         }
386 }
387
388 static void scc_mgr_set_dqs_en_phase_all_ranks(u32 read_group, u32 phase)
389 {
390         /*
391          * USER although the h/w doesn't support different phases per
392          * shadow register, for simplicity our scc manager modeling
393          * keeps different phase settings per shadow reg, and it's
394          * important for us to keep them in sync to match h/w.
395          * for efficiency, the scan chain update should occur only
396          * once to sr0.
397          */
398         scc_mgr_set_all_ranks(SCC_MGR_DQS_EN_PHASE_OFFSET,
399                               read_group, phase, 0);
400 }
401
402 static void scc_mgr_set_dqdqs_output_phase_all_ranks(uint32_t write_group,
403                                                      uint32_t phase)
404 {
405         /*
406          * USER although the h/w doesn't support different phases per
407          * shadow register, for simplicity our scc manager modeling
408          * keeps different phase settings per shadow reg, and it's
409          * important for us to keep them in sync to match h/w.
410          * for efficiency, the scan chain update should occur only
411          * once to sr0.
412          */
413         scc_mgr_set_all_ranks(SCC_MGR_DQDQS_OUT_PHASE_OFFSET,
414                               write_group, phase, 0);
415 }
416
417 static void scc_mgr_set_dqs_en_delay_all_ranks(uint32_t read_group,
418                                                uint32_t delay)
419 {
420         /*
421          * In shadow register mode, the T11 settings are stored in
422          * registers in the core, which are updated by the DQS_ENA
423          * signals. Not issuing the SCC_MGR_UPD command allows us to
424          * save lots of rank switching overhead, by calling
425          * select_shadow_regs_for_update with update_scan_chains
426          * set to 0.
427          */
428         scc_mgr_set_all_ranks(SCC_MGR_DQS_EN_DELAY_OFFSET,
429                               read_group, delay, 1);
430         writel(0, &sdr_scc_mgr->update);
431 }
432
433 static void scc_mgr_set_oct_out1_delay(uint32_t write_group, uint32_t delay)
434 {
435         uint32_t read_group;
436         uint32_t addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_OCT_OUT1_DELAY_OFFSET;
437
438         /*
439          * Load the setting in the SCC manager
440          * Although OCT affects only write data, the OCT delay is controlled
441          * by the DQS logic block which is instantiated once per read group.
442          * For protocols where a write group consists of multiple read groups,
443          * the setting must be set multiple times.
444          */
445         for (read_group = write_group * RW_MGR_MEM_IF_READ_DQS_WIDTH /
446              RW_MGR_MEM_IF_WRITE_DQS_WIDTH;
447              read_group < (write_group + 1) * RW_MGR_MEM_IF_READ_DQS_WIDTH /
448              RW_MGR_MEM_IF_WRITE_DQS_WIDTH; ++read_group)
449                 writel(delay, addr + (read_group << 2));
450 }
451
452 static void scc_mgr_set_hhp_extras(void)
453 {
454         /*
455          * Load the fixed setting in the SCC manager
456          * bits: 0:0 = 1'b1   - dqs bypass
457          * bits: 1:1 = 1'b1   - dq bypass
458          * bits: 4:2 = 3'b001   - rfifo_mode
459          * bits: 6:5 = 2'b01  - rfifo clock_select
460          * bits: 7:7 = 1'b0  - separate gating from ungating setting
461          * bits: 8:8 = 1'b0  - separate OE from Output delay setting
462          */
463         uint32_t value = (0<<8) | (0<<7) | (1<<5) | (1<<2) | (1<<1) | (1<<0);
464         uint32_t addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_HHP_GLOBALS_OFFSET;
465
466         writel(value, addr + SCC_MGR_HHP_EXTRAS_OFFSET);
467 }
468
469 /*
470  * USER Zero all DQS config
471  * TODO: maybe rename to scc_mgr_zero_dqs_config (or something)
472  */
473 static void scc_mgr_zero_all(void)
474 {
475         uint32_t i, r;
476
477         /*
478          * USER Zero all DQS config settings, across all groups and all
479          * shadow registers
480          */
481         for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; r +=
482              NUM_RANKS_PER_SHADOW_REG) {
483                 for (i = 0; i < RW_MGR_MEM_IF_READ_DQS_WIDTH; i++) {
484                         /*
485                          * The phases actually don't exist on a per-rank basis,
486                          * but there's no harm updating them several times, so
487                          * let's keep the code simple.
488                          */
489                         scc_mgr_set_dqs_bus_in_delay(i, IO_DQS_IN_RESERVE);
490                         scc_mgr_set_dqs_en_phase(i, 0);
491                         scc_mgr_set_dqs_en_delay(i, 0);
492                 }
493
494                 for (i = 0; i < RW_MGR_MEM_IF_WRITE_DQS_WIDTH; i++) {
495                         scc_mgr_set_dqdqs_output_phase(i, 0);
496                         /* av/cv don't have out2 */
497                         scc_mgr_set_oct_out1_delay(i, IO_DQS_OUT_RESERVE);
498                 }
499         }
500
501         /* multicast to all DQS group enables */
502         writel(0xff, &sdr_scc_mgr->dqs_ena);
503         writel(0, &sdr_scc_mgr->update);
504 }
505
506 /**
507  * scc_set_bypass_mode() - Set bypass mode and trigger SCC update
508  * @write_group:        Write group
509  *
510  * Set bypass mode and trigger SCC update.
511  */
512 static void scc_set_bypass_mode(const u32 write_group)
513 {
514         /* Only needed once to set all groups, pins, DQ, DQS, DM. */
515         if (write_group == 0) {
516                 debug_cond(DLEVEL == 1, "%s:%d Setting HHP Extras\n", __func__,
517                            __LINE__);
518                 scc_mgr_set_hhp_extras();
519                 debug_cond(DLEVEL == 1, "%s:%d Done Setting HHP Extras\n",
520                           __func__, __LINE__);
521         }
522
523         /* Multicast to all DQ enables. */
524         writel(0xff, &sdr_scc_mgr->dq_ena);
525         writel(0xff, &sdr_scc_mgr->dm_ena);
526
527         /* Update current DQS IO enable. */
528         writel(0, &sdr_scc_mgr->dqs_io_ena);
529
530         /* Update the DQS logic. */
531         writel(write_group, &sdr_scc_mgr->dqs_ena);
532
533         /* Hit update. */
534         writel(0, &sdr_scc_mgr->update);
535 }
536
537 /**
538  * scc_mgr_load_dqs_for_write_group() - Load DQS settings for Write Group
539  * @write_group:        Write group
540  *
541  * Load DQS settings for Write Group, do not trigger SCC update.
542  */
543 static void scc_mgr_load_dqs_for_write_group(const u32 write_group)
544 {
545         const int ratio = RW_MGR_MEM_IF_READ_DQS_WIDTH /
546                           RW_MGR_MEM_IF_WRITE_DQS_WIDTH;
547         const int base = write_group * ratio;
548         int i;
549         /*
550          * Load the setting in the SCC manager
551          * Although OCT affects only write data, the OCT delay is controlled
552          * by the DQS logic block which is instantiated once per read group.
553          * For protocols where a write group consists of multiple read groups,
554          * the setting must be set multiple times.
555          */
556         for (i = 0; i < ratio; i++)
557                 writel(base + i, &sdr_scc_mgr->dqs_ena);
558 }
559
560 static void scc_mgr_zero_group(uint32_t write_group, uint32_t test_begin,
561                                int32_t out_only)
562 {
563         uint32_t i, r;
564
565         for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; r +=
566                 NUM_RANKS_PER_SHADOW_REG) {
567                 /* Zero all DQ config settings */
568                 for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) {
569                         scc_mgr_set_dq_out1_delay(i, 0);
570                         if (!out_only)
571                                 scc_mgr_set_dq_in_delay(i, 0);
572                 }
573
574                 /* multicast to all DQ enables */
575                 writel(0xff, &sdr_scc_mgr->dq_ena);
576
577                 /* Zero all DM config settings */
578                 for (i = 0; i < RW_MGR_NUM_DM_PER_WRITE_GROUP; i++) {
579                         scc_mgr_set_dm_out1_delay(i, 0);
580                 }
581
582                 /* multicast to all DM enables */
583                 writel(0xff, &sdr_scc_mgr->dm_ena);
584
585                 /* zero all DQS io settings */
586                 if (!out_only)
587                         scc_mgr_set_dqs_io_in_delay(write_group, 0);
588                 /* av/cv don't have out2 */
589                 scc_mgr_set_dqs_out1_delay(write_group, IO_DQS_OUT_RESERVE);
590                 scc_mgr_set_oct_out1_delay(write_group, IO_DQS_OUT_RESERVE);
591                 scc_mgr_load_dqs_for_write_group(write_group);
592
593                 /* multicast to all DQS IO enables (only 1) */
594                 writel(0, &sdr_scc_mgr->dqs_io_ena);
595
596                 /* hit update to zero everything */
597                 writel(0, &sdr_scc_mgr->update);
598         }
599 }
600
601 /*
602  * apply and load a particular input delay for the DQ pins in a group
603  * group_bgn is the index of the first dq pin (in the write group)
604  */
605 static void scc_mgr_apply_group_dq_in_delay(uint32_t write_group,
606                                             uint32_t group_bgn, uint32_t delay)
607 {
608         uint32_t i, p;
609
610         for (i = 0, p = group_bgn; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++, p++) {
611                 scc_mgr_set_dq_in_delay(p, delay);
612                 scc_mgr_load_dq(p);
613         }
614 }
615
616 /* apply and load a particular output delay for the DQ pins in a group */
617 static void scc_mgr_apply_group_dq_out1_delay(uint32_t write_group,
618                                               uint32_t group_bgn,
619                                               uint32_t delay1)
620 {
621         uint32_t i, p;
622
623         for (i = 0, p = group_bgn; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++, p++) {
624                 scc_mgr_set_dq_out1_delay(i, delay1);
625                 scc_mgr_load_dq(i);
626         }
627 }
628
629 /* apply and load a particular output delay for the DM pins in a group */
630 static void scc_mgr_apply_group_dm_out1_delay(uint32_t write_group,
631                                               uint32_t delay1)
632 {
633         uint32_t i;
634
635         for (i = 0; i < RW_MGR_NUM_DM_PER_WRITE_GROUP; i++) {
636                 scc_mgr_set_dm_out1_delay(i, delay1);
637                 scc_mgr_load_dm(i);
638         }
639 }
640
641
642 /* apply and load delay on both DQS and OCT out1 */
643 static void scc_mgr_apply_group_dqs_io_and_oct_out1(uint32_t write_group,
644                                                     uint32_t delay)
645 {
646         scc_mgr_set_dqs_out1_delay(write_group, delay);
647         scc_mgr_load_dqs_io();
648
649         scc_mgr_set_oct_out1_delay(write_group, delay);
650         scc_mgr_load_dqs_for_write_group(write_group);
651 }
652
653 /* apply a delay to the entire output side: DQ, DM, DQS, OCT */
654 static void scc_mgr_apply_group_all_out_delay_add(uint32_t write_group,
655                                                   uint32_t group_bgn,
656                                                   uint32_t delay)
657 {
658         uint32_t i, p, new_delay;
659
660         /* dq shift */
661         for (i = 0, p = group_bgn; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++, p++) {
662                 new_delay = READ_SCC_DQ_OUT2_DELAY;
663                 new_delay += delay;
664
665                 if (new_delay > IO_IO_OUT2_DELAY_MAX) {
666                         debug_cond(DLEVEL == 1, "%s:%d (%u, %u, %u) DQ[%u,%u]:\
667                                    %u > %lu => %lu", __func__, __LINE__,
668                                    write_group, group_bgn, delay, i, p, new_delay,
669                                    (long unsigned int)IO_IO_OUT2_DELAY_MAX,
670                                    (long unsigned int)IO_IO_OUT2_DELAY_MAX);
671                         new_delay = IO_IO_OUT2_DELAY_MAX;
672                 }
673
674                 scc_mgr_load_dq(i);
675         }
676
677         /* dm shift */
678         for (i = 0; i < RW_MGR_NUM_DM_PER_WRITE_GROUP; i++) {
679                 new_delay = READ_SCC_DM_IO_OUT2_DELAY;
680                 new_delay += delay;
681
682                 if (new_delay > IO_IO_OUT2_DELAY_MAX) {
683                         debug_cond(DLEVEL == 1, "%s:%d (%u, %u, %u) DM[%u]:\
684                                    %u > %lu => %lu\n",  __func__, __LINE__,
685                                    write_group, group_bgn, delay, i, new_delay,
686                                    (long unsigned int)IO_IO_OUT2_DELAY_MAX,
687                                    (long unsigned int)IO_IO_OUT2_DELAY_MAX);
688                         new_delay = IO_IO_OUT2_DELAY_MAX;
689                 }
690
691                 scc_mgr_load_dm(i);
692         }
693
694         /* dqs shift */
695         new_delay = READ_SCC_DQS_IO_OUT2_DELAY;
696         new_delay += delay;
697
698         if (new_delay > IO_IO_OUT2_DELAY_MAX) {
699                 debug_cond(DLEVEL == 1, "%s:%d (%u, %u, %u) DQS: %u > %d => %d;"
700                            " adding %u to OUT1\n", __func__, __LINE__,
701                            write_group, group_bgn, delay, new_delay,
702                            IO_IO_OUT2_DELAY_MAX, IO_IO_OUT2_DELAY_MAX,
703                            new_delay - IO_IO_OUT2_DELAY_MAX);
704                 scc_mgr_set_dqs_out1_delay(write_group, new_delay -
705                                            IO_IO_OUT2_DELAY_MAX);
706                 new_delay = IO_IO_OUT2_DELAY_MAX;
707         }
708
709         scc_mgr_load_dqs_io();
710
711         /* oct shift */
712         new_delay = READ_SCC_OCT_OUT2_DELAY;
713         new_delay += delay;
714
715         if (new_delay > IO_IO_OUT2_DELAY_MAX) {
716                 debug_cond(DLEVEL == 1, "%s:%d (%u, %u, %u) DQS: %u > %d => %d;"
717                            " adding %u to OUT1\n", __func__, __LINE__,
718                            write_group, group_bgn, delay, new_delay,
719                            IO_IO_OUT2_DELAY_MAX, IO_IO_OUT2_DELAY_MAX,
720                            new_delay - IO_IO_OUT2_DELAY_MAX);
721                 scc_mgr_set_oct_out1_delay(write_group, new_delay -
722                                            IO_IO_OUT2_DELAY_MAX);
723                 new_delay = IO_IO_OUT2_DELAY_MAX;
724         }
725
726         scc_mgr_load_dqs_for_write_group(write_group);
727 }
728
729 /*
730  * USER apply a delay to the entire output side (DQ, DM, DQS, OCT)
731  * and to all ranks
732  */
733 static void scc_mgr_apply_group_all_out_delay_add_all_ranks(
734         uint32_t write_group, uint32_t group_bgn, uint32_t delay)
735 {
736         uint32_t r;
737
738         for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS;
739                 r += NUM_RANKS_PER_SHADOW_REG) {
740                 scc_mgr_apply_group_all_out_delay_add(write_group,
741                                                       group_bgn, delay);
742                 writel(0, &sdr_scc_mgr->update);
743         }
744 }
745
746 /* optimization used to recover some slots in ddr3 inst_rom */
747 /* could be applied to other protocols if we wanted to */
748 static void set_jump_as_return(void)
749 {
750         /*
751          * to save space, we replace return with jump to special shared
752          * RETURN instruction so we set the counter to large value so that
753          * we always jump
754          */
755         writel(0xff, &sdr_rw_load_mgr_regs->load_cntr0);
756         writel(RW_MGR_RETURN, &sdr_rw_load_jump_mgr_regs->load_jump_add0);
757 }
758
759 /*
760  * should always use constants as argument to ensure all computations are
761  * performed at compile time
762  */
763 static void delay_for_n_mem_clocks(const uint32_t clocks)
764 {
765         uint32_t afi_clocks;
766         uint8_t inner = 0;
767         uint8_t outer = 0;
768         uint16_t c_loop = 0;
769
770         debug("%s:%d: clocks=%u ... start\n", __func__, __LINE__, clocks);
771
772
773         afi_clocks = (clocks + AFI_RATE_RATIO-1) / AFI_RATE_RATIO;
774         /* scale (rounding up) to get afi clocks */
775
776         /*
777          * Note, we don't bother accounting for being off a little bit
778          * because of a few extra instructions in outer loops
779          * Note, the loops have a test at the end, and do the test before
780          * the decrement, and so always perform the loop
781          * 1 time more than the counter value
782          */
783         if (afi_clocks == 0) {
784                 ;
785         } else if (afi_clocks <= 0x100) {
786                 inner = afi_clocks-1;
787                 outer = 0;
788                 c_loop = 0;
789         } else if (afi_clocks <= 0x10000) {
790                 inner = 0xff;
791                 outer = (afi_clocks-1) >> 8;
792                 c_loop = 0;
793         } else {
794                 inner = 0xff;
795                 outer = 0xff;
796                 c_loop = (afi_clocks-1) >> 16;
797         }
798
799         /*
800          * rom instructions are structured as follows:
801          *
802          *    IDLE_LOOP2: jnz cntr0, TARGET_A
803          *    IDLE_LOOP1: jnz cntr1, TARGET_B
804          *                return
805          *
806          * so, when doing nested loops, TARGET_A is set to IDLE_LOOP2, and
807          * TARGET_B is set to IDLE_LOOP2 as well
808          *
809          * if we have no outer loop, though, then we can use IDLE_LOOP1 only,
810          * and set TARGET_B to IDLE_LOOP1 and we skip IDLE_LOOP2 entirely
811          *
812          * a little confusing, but it helps save precious space in the inst_rom
813          * and sequencer rom and keeps the delays more accurate and reduces
814          * overhead
815          */
816         if (afi_clocks <= 0x100) {
817                 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(inner),
818                         &sdr_rw_load_mgr_regs->load_cntr1);
819
820                 writel(RW_MGR_IDLE_LOOP1,
821                         &sdr_rw_load_jump_mgr_regs->load_jump_add1);
822
823                 writel(RW_MGR_IDLE_LOOP1, SDR_PHYGRP_RWMGRGRP_ADDRESS |
824                                           RW_MGR_RUN_SINGLE_GROUP_OFFSET);
825         } else {
826                 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(inner),
827                         &sdr_rw_load_mgr_regs->load_cntr0);
828
829                 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(outer),
830                         &sdr_rw_load_mgr_regs->load_cntr1);
831
832                 writel(RW_MGR_IDLE_LOOP2,
833                         &sdr_rw_load_jump_mgr_regs->load_jump_add0);
834
835                 writel(RW_MGR_IDLE_LOOP2,
836                         &sdr_rw_load_jump_mgr_regs->load_jump_add1);
837
838                 /* hack to get around compiler not being smart enough */
839                 if (afi_clocks <= 0x10000) {
840                         /* only need to run once */
841                         writel(RW_MGR_IDLE_LOOP2, SDR_PHYGRP_RWMGRGRP_ADDRESS |
842                                                   RW_MGR_RUN_SINGLE_GROUP_OFFSET);
843                 } else {
844                         do {
845                                 writel(RW_MGR_IDLE_LOOP2,
846                                         SDR_PHYGRP_RWMGRGRP_ADDRESS |
847                                         RW_MGR_RUN_SINGLE_GROUP_OFFSET);
848                         } while (c_loop-- != 0);
849                 }
850         }
851         debug("%s:%d clocks=%u ... end\n", __func__, __LINE__, clocks);
852 }
853
854 static void rw_mgr_mem_initialize(void)
855 {
856         uint32_t r;
857         uint32_t grpaddr = SDR_PHYGRP_RWMGRGRP_ADDRESS |
858                            RW_MGR_RUN_SINGLE_GROUP_OFFSET;
859
860         debug("%s:%d\n", __func__, __LINE__);
861
862         /* The reset / cke part of initialization is broadcasted to all ranks */
863         writel(RW_MGR_RANK_ALL, SDR_PHYGRP_RWMGRGRP_ADDRESS |
864                                 RW_MGR_SET_CS_AND_ODT_MASK_OFFSET);
865
866         /*
867          * Here's how you load register for a loop
868          * Counters are located @ 0x800
869          * Jump address are located @ 0xC00
870          * For both, registers 0 to 3 are selected using bits 3 and 2, like
871          * in 0x800, 0x804, 0x808, 0x80C and 0xC00, 0xC04, 0xC08, 0xC0C
872          * I know this ain't pretty, but Avalon bus throws away the 2 least
873          * significant bits
874          */
875
876         /* start with memory RESET activated */
877
878         /* tINIT = 200us */
879
880         /*
881          * 200us @ 266MHz (3.75 ns) ~ 54000 clock cycles
882          * If a and b are the number of iteration in 2 nested loops
883          * it takes the following number of cycles to complete the operation:
884          * number_of_cycles = ((2 + n) * a + 2) * b
885          * where n is the number of instruction in the inner loop
886          * One possible solution is n = 0 , a = 256 , b = 106 => a = FF,
887          * b = 6A
888          */
889
890         /* Load counters */
891         writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(SEQ_TINIT_CNTR0_VAL),
892                &sdr_rw_load_mgr_regs->load_cntr0);
893         writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(SEQ_TINIT_CNTR1_VAL),
894                &sdr_rw_load_mgr_regs->load_cntr1);
895         writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(SEQ_TINIT_CNTR2_VAL),
896                &sdr_rw_load_mgr_regs->load_cntr2);
897
898         /* Load jump address */
899         writel(RW_MGR_INIT_RESET_0_CKE_0,
900                 &sdr_rw_load_jump_mgr_regs->load_jump_add0);
901         writel(RW_MGR_INIT_RESET_0_CKE_0,
902                 &sdr_rw_load_jump_mgr_regs->load_jump_add1);
903         writel(RW_MGR_INIT_RESET_0_CKE_0,
904                 &sdr_rw_load_jump_mgr_regs->load_jump_add2);
905
906         /* Execute count instruction */
907         writel(RW_MGR_INIT_RESET_0_CKE_0, grpaddr);
908
909         /* indicate that memory is stable */
910         writel(1, &phy_mgr_cfg->reset_mem_stbl);
911
912         /*
913          * transition the RESET to high
914          * Wait for 500us
915          */
916
917         /*
918          * 500us @ 266MHz (3.75 ns) ~ 134000 clock cycles
919          * If a and b are the number of iteration in 2 nested loops
920          * it takes the following number of cycles to complete the operation
921          * number_of_cycles = ((2 + n) * a + 2) * b
922          * where n is the number of instruction in the inner loop
923          * One possible solution is n = 2 , a = 131 , b = 256 => a = 83,
924          * b = FF
925          */
926
927         /* Load counters */
928         writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(SEQ_TRESET_CNTR0_VAL),
929                &sdr_rw_load_mgr_regs->load_cntr0);
930         writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(SEQ_TRESET_CNTR1_VAL),
931                &sdr_rw_load_mgr_regs->load_cntr1);
932         writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(SEQ_TRESET_CNTR2_VAL),
933                &sdr_rw_load_mgr_regs->load_cntr2);
934
935         /* Load jump address */
936         writel(RW_MGR_INIT_RESET_1_CKE_0,
937                 &sdr_rw_load_jump_mgr_regs->load_jump_add0);
938         writel(RW_MGR_INIT_RESET_1_CKE_0,
939                 &sdr_rw_load_jump_mgr_regs->load_jump_add1);
940         writel(RW_MGR_INIT_RESET_1_CKE_0,
941                 &sdr_rw_load_jump_mgr_regs->load_jump_add2);
942
943         writel(RW_MGR_INIT_RESET_1_CKE_0, grpaddr);
944
945         /* bring up clock enable */
946
947         /* tXRP < 250 ck cycles */
948         delay_for_n_mem_clocks(250);
949
950         for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; r++) {
951                 if (param->skip_ranks[r]) {
952                         /* request to skip the rank */
953                         continue;
954                 }
955
956                 /* set rank */
957                 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_OFF);
958
959                 /*
960                  * USER Use Mirror-ed commands for odd ranks if address
961                  * mirrorring is on
962                  */
963                 if ((RW_MGR_MEM_ADDRESS_MIRRORING >> r) & 0x1) {
964                         set_jump_as_return();
965                         writel(RW_MGR_MRS2_MIRR, grpaddr);
966                         delay_for_n_mem_clocks(4);
967                         set_jump_as_return();
968                         writel(RW_MGR_MRS3_MIRR, grpaddr);
969                         delay_for_n_mem_clocks(4);
970                         set_jump_as_return();
971                         writel(RW_MGR_MRS1_MIRR, grpaddr);
972                         delay_for_n_mem_clocks(4);
973                         set_jump_as_return();
974                         writel(RW_MGR_MRS0_DLL_RESET_MIRR, grpaddr);
975                 } else {
976                         set_jump_as_return();
977                         writel(RW_MGR_MRS2, grpaddr);
978                         delay_for_n_mem_clocks(4);
979                         set_jump_as_return();
980                         writel(RW_MGR_MRS3, grpaddr);
981                         delay_for_n_mem_clocks(4);
982                         set_jump_as_return();
983                         writel(RW_MGR_MRS1, grpaddr);
984                         set_jump_as_return();
985                         writel(RW_MGR_MRS0_DLL_RESET, grpaddr);
986                 }
987                 set_jump_as_return();
988                 writel(RW_MGR_ZQCL, grpaddr);
989
990                 /* tZQinit = tDLLK = 512 ck cycles */
991                 delay_for_n_mem_clocks(512);
992         }
993 }
994
995 /*
996  * At the end of calibration we have to program the user settings in, and
997  * USER  hand off the memory to the user.
998  */
999 static void rw_mgr_mem_handoff(void)
1000 {
1001         uint32_t r;
1002         uint32_t grpaddr = SDR_PHYGRP_RWMGRGRP_ADDRESS |
1003                            RW_MGR_RUN_SINGLE_GROUP_OFFSET;
1004
1005         debug("%s:%d\n", __func__, __LINE__);
1006         for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; r++) {
1007                 if (param->skip_ranks[r])
1008                         /* request to skip the rank */
1009                         continue;
1010                 /* set rank */
1011                 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_OFF);
1012
1013                 /* precharge all banks ... */
1014                 writel(RW_MGR_PRECHARGE_ALL, grpaddr);
1015
1016                 /* load up MR settings specified by user */
1017
1018                 /*
1019                  * Use Mirror-ed commands for odd ranks if address
1020                  * mirrorring is on
1021                  */
1022                 if ((RW_MGR_MEM_ADDRESS_MIRRORING >> r) & 0x1) {
1023                         set_jump_as_return();
1024                         writel(RW_MGR_MRS2_MIRR, grpaddr);
1025                         delay_for_n_mem_clocks(4);
1026                         set_jump_as_return();
1027                         writel(RW_MGR_MRS3_MIRR, grpaddr);
1028                         delay_for_n_mem_clocks(4);
1029                         set_jump_as_return();
1030                         writel(RW_MGR_MRS1_MIRR, grpaddr);
1031                         delay_for_n_mem_clocks(4);
1032                         set_jump_as_return();
1033                         writel(RW_MGR_MRS0_USER_MIRR, grpaddr);
1034                 } else {
1035                         set_jump_as_return();
1036                         writel(RW_MGR_MRS2, grpaddr);
1037                         delay_for_n_mem_clocks(4);
1038                         set_jump_as_return();
1039                         writel(RW_MGR_MRS3, grpaddr);
1040                         delay_for_n_mem_clocks(4);
1041                         set_jump_as_return();
1042                         writel(RW_MGR_MRS1, grpaddr);
1043                         delay_for_n_mem_clocks(4);
1044                         set_jump_as_return();
1045                         writel(RW_MGR_MRS0_USER, grpaddr);
1046                 }
1047                 /*
1048                  * USER  need to wait tMOD (12CK or 15ns) time before issuing
1049                  * other commands, but we will have plenty of NIOS cycles before
1050                  * actual handoff so its okay.
1051                  */
1052         }
1053 }
1054
1055 /*
1056  * performs a guaranteed read on the patterns we are going to use during a
1057  * read test to ensure memory works
1058  */
1059 static uint32_t rw_mgr_mem_calibrate_read_test_patterns(uint32_t rank_bgn,
1060         uint32_t group, uint32_t num_tries, uint32_t *bit_chk,
1061         uint32_t all_ranks)
1062 {
1063         uint32_t r, vg;
1064         uint32_t correct_mask_vg;
1065         uint32_t tmp_bit_chk;
1066         uint32_t rank_end = all_ranks ? RW_MGR_MEM_NUMBER_OF_RANKS :
1067                 (rank_bgn + NUM_RANKS_PER_SHADOW_REG);
1068         uint32_t addr;
1069         uint32_t base_rw_mgr;
1070
1071         *bit_chk = param->read_correct_mask;
1072         correct_mask_vg = param->read_correct_mask_vg;
1073
1074         for (r = rank_bgn; r < rank_end; r++) {
1075                 if (param->skip_ranks[r])
1076                         /* request to skip the rank */
1077                         continue;
1078
1079                 /* set rank */
1080                 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_READ_WRITE);
1081
1082                 /* Load up a constant bursts of read commands */
1083                 writel(0x20, &sdr_rw_load_mgr_regs->load_cntr0);
1084                 writel(RW_MGR_GUARANTEED_READ,
1085                         &sdr_rw_load_jump_mgr_regs->load_jump_add0);
1086
1087                 writel(0x20, &sdr_rw_load_mgr_regs->load_cntr1);
1088                 writel(RW_MGR_GUARANTEED_READ_CONT,
1089                         &sdr_rw_load_jump_mgr_regs->load_jump_add1);
1090
1091                 tmp_bit_chk = 0;
1092                 for (vg = RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS-1; ; vg--) {
1093                         /* reset the fifos to get pointers to known state */
1094
1095                         writel(0, &phy_mgr_cmd->fifo_reset);
1096                         writel(0, SDR_PHYGRP_RWMGRGRP_ADDRESS |
1097                                   RW_MGR_RESET_READ_DATAPATH_OFFSET);
1098
1099                         tmp_bit_chk = tmp_bit_chk << (RW_MGR_MEM_DQ_PER_READ_DQS
1100                                 / RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS);
1101
1102                         addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET;
1103                         writel(RW_MGR_GUARANTEED_READ, addr +
1104                                ((group * RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS +
1105                                 vg) << 2));
1106
1107                         base_rw_mgr = readl(SDR_PHYGRP_RWMGRGRP_ADDRESS);
1108                         tmp_bit_chk = tmp_bit_chk | (correct_mask_vg & (~base_rw_mgr));
1109
1110                         if (vg == 0)
1111                                 break;
1112                 }
1113                 *bit_chk &= tmp_bit_chk;
1114         }
1115
1116         addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET;
1117         writel(RW_MGR_CLEAR_DQS_ENABLE, addr + (group << 2));
1118
1119         set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF);
1120         debug_cond(DLEVEL == 1, "%s:%d test_load_patterns(%u,ALL) => (%u == %u) =>\
1121                    %lu\n", __func__, __LINE__, group, *bit_chk, param->read_correct_mask,
1122                    (long unsigned int)(*bit_chk == param->read_correct_mask));
1123         return *bit_chk == param->read_correct_mask;
1124 }
1125
1126 static uint32_t rw_mgr_mem_calibrate_read_test_patterns_all_ranks
1127         (uint32_t group, uint32_t num_tries, uint32_t *bit_chk)
1128 {
1129         return rw_mgr_mem_calibrate_read_test_patterns(0, group,
1130                 num_tries, bit_chk, 1);
1131 }
1132
1133 /* load up the patterns we are going to use during a read test */
1134 static void rw_mgr_mem_calibrate_read_load_patterns(uint32_t rank_bgn,
1135         uint32_t all_ranks)
1136 {
1137         uint32_t r;
1138         uint32_t rank_end = all_ranks ? RW_MGR_MEM_NUMBER_OF_RANKS :
1139                 (rank_bgn + NUM_RANKS_PER_SHADOW_REG);
1140
1141         debug("%s:%d\n", __func__, __LINE__);
1142         for (r = rank_bgn; r < rank_end; r++) {
1143                 if (param->skip_ranks[r])
1144                         /* request to skip the rank */
1145                         continue;
1146
1147                 /* set rank */
1148                 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_READ_WRITE);
1149
1150                 /* Load up a constant bursts */
1151                 writel(0x20, &sdr_rw_load_mgr_regs->load_cntr0);
1152
1153                 writel(RW_MGR_GUARANTEED_WRITE_WAIT0,
1154                         &sdr_rw_load_jump_mgr_regs->load_jump_add0);
1155
1156                 writel(0x20, &sdr_rw_load_mgr_regs->load_cntr1);
1157
1158                 writel(RW_MGR_GUARANTEED_WRITE_WAIT1,
1159                         &sdr_rw_load_jump_mgr_regs->load_jump_add1);
1160
1161                 writel(0x04, &sdr_rw_load_mgr_regs->load_cntr2);
1162
1163                 writel(RW_MGR_GUARANTEED_WRITE_WAIT2,
1164                         &sdr_rw_load_jump_mgr_regs->load_jump_add2);
1165
1166                 writel(0x04, &sdr_rw_load_mgr_regs->load_cntr3);
1167
1168                 writel(RW_MGR_GUARANTEED_WRITE_WAIT3,
1169                         &sdr_rw_load_jump_mgr_regs->load_jump_add3);
1170
1171                 writel(RW_MGR_GUARANTEED_WRITE, SDR_PHYGRP_RWMGRGRP_ADDRESS |
1172                                                 RW_MGR_RUN_SINGLE_GROUP_OFFSET);
1173         }
1174
1175         set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF);
1176 }
1177
1178 /*
1179  * try a read and see if it returns correct data back. has dummy reads
1180  * inserted into the mix used to align dqs enable. has more thorough checks
1181  * than the regular read test.
1182  */
1183 static uint32_t rw_mgr_mem_calibrate_read_test(uint32_t rank_bgn, uint32_t group,
1184         uint32_t num_tries, uint32_t all_correct, uint32_t *bit_chk,
1185         uint32_t all_groups, uint32_t all_ranks)
1186 {
1187         uint32_t r, vg;
1188         uint32_t correct_mask_vg;
1189         uint32_t tmp_bit_chk;
1190         uint32_t rank_end = all_ranks ? RW_MGR_MEM_NUMBER_OF_RANKS :
1191                 (rank_bgn + NUM_RANKS_PER_SHADOW_REG);
1192         uint32_t addr;
1193         uint32_t base_rw_mgr;
1194
1195         *bit_chk = param->read_correct_mask;
1196         correct_mask_vg = param->read_correct_mask_vg;
1197
1198         uint32_t quick_read_mode = (((STATIC_CALIB_STEPS) &
1199                 CALIB_SKIP_DELAY_SWEEPS) && ENABLE_SUPER_QUICK_CALIBRATION);
1200
1201         for (r = rank_bgn; r < rank_end; r++) {
1202                 if (param->skip_ranks[r])
1203                         /* request to skip the rank */
1204                         continue;
1205
1206                 /* set rank */
1207                 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_READ_WRITE);
1208
1209                 writel(0x10, &sdr_rw_load_mgr_regs->load_cntr1);
1210
1211                 writel(RW_MGR_READ_B2B_WAIT1,
1212                         &sdr_rw_load_jump_mgr_regs->load_jump_add1);
1213
1214                 writel(0x10, &sdr_rw_load_mgr_regs->load_cntr2);
1215                 writel(RW_MGR_READ_B2B_WAIT2,
1216                         &sdr_rw_load_jump_mgr_regs->load_jump_add2);
1217
1218                 if (quick_read_mode)
1219                         writel(0x1, &sdr_rw_load_mgr_regs->load_cntr0);
1220                         /* need at least two (1+1) reads to capture failures */
1221                 else if (all_groups)
1222                         writel(0x06, &sdr_rw_load_mgr_regs->load_cntr0);
1223                 else
1224                         writel(0x32, &sdr_rw_load_mgr_regs->load_cntr0);
1225
1226                 writel(RW_MGR_READ_B2B,
1227                         &sdr_rw_load_jump_mgr_regs->load_jump_add0);
1228                 if (all_groups)
1229                         writel(RW_MGR_MEM_IF_READ_DQS_WIDTH *
1230                                RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS - 1,
1231                                &sdr_rw_load_mgr_regs->load_cntr3);
1232                 else
1233                         writel(0x0, &sdr_rw_load_mgr_regs->load_cntr3);
1234
1235                 writel(RW_MGR_READ_B2B,
1236                         &sdr_rw_load_jump_mgr_regs->load_jump_add3);
1237
1238                 tmp_bit_chk = 0;
1239                 for (vg = RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS-1; ; vg--) {
1240                         /* reset the fifos to get pointers to known state */
1241                         writel(0, &phy_mgr_cmd->fifo_reset);
1242                         writel(0, SDR_PHYGRP_RWMGRGRP_ADDRESS |
1243                                   RW_MGR_RESET_READ_DATAPATH_OFFSET);
1244
1245                         tmp_bit_chk = tmp_bit_chk << (RW_MGR_MEM_DQ_PER_READ_DQS
1246                                 / RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS);
1247
1248                         if (all_groups)
1249                                 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_ALL_GROUPS_OFFSET;
1250                         else
1251                                 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET;
1252
1253                         writel(RW_MGR_READ_B2B, addr +
1254                                ((group * RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS +
1255                                vg) << 2));
1256
1257                         base_rw_mgr = readl(SDR_PHYGRP_RWMGRGRP_ADDRESS);
1258                         tmp_bit_chk = tmp_bit_chk | (correct_mask_vg & ~(base_rw_mgr));
1259
1260                         if (vg == 0)
1261                                 break;
1262                 }
1263                 *bit_chk &= tmp_bit_chk;
1264         }
1265
1266         addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET;
1267         writel(RW_MGR_CLEAR_DQS_ENABLE, addr + (group << 2));
1268
1269         if (all_correct) {
1270                 set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF);
1271                 debug_cond(DLEVEL == 2, "%s:%d read_test(%u,ALL,%u) =>\
1272                            (%u == %u) => %lu", __func__, __LINE__, group,
1273                            all_groups, *bit_chk, param->read_correct_mask,
1274                            (long unsigned int)(*bit_chk ==
1275                            param->read_correct_mask));
1276                 return *bit_chk == param->read_correct_mask;
1277         } else  {
1278                 set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF);
1279                 debug_cond(DLEVEL == 2, "%s:%d read_test(%u,ONE,%u) =>\
1280                            (%u != %lu) => %lu\n", __func__, __LINE__,
1281                            group, all_groups, *bit_chk, (long unsigned int)0,
1282                            (long unsigned int)(*bit_chk != 0x00));
1283                 return *bit_chk != 0x00;
1284         }
1285 }
1286
1287 static uint32_t rw_mgr_mem_calibrate_read_test_all_ranks(uint32_t group,
1288         uint32_t num_tries, uint32_t all_correct, uint32_t *bit_chk,
1289         uint32_t all_groups)
1290 {
1291         return rw_mgr_mem_calibrate_read_test(0, group, num_tries, all_correct,
1292                                               bit_chk, all_groups, 1);
1293 }
1294
1295 static void rw_mgr_incr_vfifo(uint32_t grp, uint32_t *v)
1296 {
1297         writel(grp, &phy_mgr_cmd->inc_vfifo_hard_phy);
1298         (*v)++;
1299 }
1300
1301 static void rw_mgr_decr_vfifo(uint32_t grp, uint32_t *v)
1302 {
1303         uint32_t i;
1304
1305         for (i = 0; i < VFIFO_SIZE-1; i++)
1306                 rw_mgr_incr_vfifo(grp, v);
1307 }
1308
1309 static int find_vfifo_read(uint32_t grp, uint32_t *bit_chk)
1310 {
1311         uint32_t  v;
1312         uint32_t fail_cnt = 0;
1313         uint32_t test_status;
1314
1315         for (v = 0; v < VFIFO_SIZE; ) {
1316                 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: vfifo %u\n",
1317                            __func__, __LINE__, v);
1318                 test_status = rw_mgr_mem_calibrate_read_test_all_ranks
1319                         (grp, 1, PASS_ONE_BIT, bit_chk, 0);
1320                 if (!test_status) {
1321                         fail_cnt++;
1322
1323                         if (fail_cnt == 2)
1324                                 break;
1325                 }
1326
1327                 /* fiddle with FIFO */
1328                 rw_mgr_incr_vfifo(grp, &v);
1329         }
1330
1331         if (v >= VFIFO_SIZE) {
1332                 /* no failing read found!! Something must have gone wrong */
1333                 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: vfifo failed\n",
1334                            __func__, __LINE__);
1335                 return 0;
1336         } else {
1337                 return v;
1338         }
1339 }
1340
1341 static int find_working_phase(uint32_t *grp, uint32_t *bit_chk,
1342                               uint32_t dtaps_per_ptap, uint32_t *work_bgn,
1343                               uint32_t *v, uint32_t *d, uint32_t *p,
1344                               uint32_t *i, uint32_t *max_working_cnt)
1345 {
1346         uint32_t found_begin = 0;
1347         uint32_t tmp_delay = 0;
1348         uint32_t test_status;
1349
1350         for (*d = 0; *d <= dtaps_per_ptap; (*d)++, tmp_delay +=
1351                 IO_DELAY_PER_DQS_EN_DCHAIN_TAP) {
1352                 *work_bgn = tmp_delay;
1353                 scc_mgr_set_dqs_en_delay_all_ranks(*grp, *d);
1354
1355                 for (*i = 0; *i < VFIFO_SIZE; (*i)++) {
1356                         for (*p = 0; *p <= IO_DQS_EN_PHASE_MAX; (*p)++, *work_bgn +=
1357                                 IO_DELAY_PER_OPA_TAP) {
1358                                 scc_mgr_set_dqs_en_phase_all_ranks(*grp, *p);
1359
1360                                 test_status =
1361                                 rw_mgr_mem_calibrate_read_test_all_ranks
1362                                 (*grp, 1, PASS_ONE_BIT, bit_chk, 0);
1363
1364                                 if (test_status) {
1365                                         *max_working_cnt = 1;
1366                                         found_begin = 1;
1367                                         break;
1368                                 }
1369                         }
1370
1371                         if (found_begin)
1372                                 break;
1373
1374                         if (*p > IO_DQS_EN_PHASE_MAX)
1375                                 /* fiddle with FIFO */
1376                                 rw_mgr_incr_vfifo(*grp, v);
1377                 }
1378
1379                 if (found_begin)
1380                         break;
1381         }
1382
1383         if (*i >= VFIFO_SIZE) {
1384                 /* cannot find working solution */
1385                 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: no vfifo/\
1386                            ptap/dtap\n", __func__, __LINE__);
1387                 return 0;
1388         } else {
1389                 return 1;
1390         }
1391 }
1392
1393 static void sdr_backup_phase(uint32_t *grp, uint32_t *bit_chk,
1394                              uint32_t *work_bgn, uint32_t *v, uint32_t *d,
1395                              uint32_t *p, uint32_t *max_working_cnt)
1396 {
1397         uint32_t found_begin = 0;
1398         uint32_t tmp_delay;
1399
1400         /* Special case code for backing up a phase */
1401         if (*p == 0) {
1402                 *p = IO_DQS_EN_PHASE_MAX;
1403                 rw_mgr_decr_vfifo(*grp, v);
1404         } else {
1405                 (*p)--;
1406         }
1407         tmp_delay = *work_bgn - IO_DELAY_PER_OPA_TAP;
1408         scc_mgr_set_dqs_en_phase_all_ranks(*grp, *p);
1409
1410         for (*d = 0; *d <= IO_DQS_EN_DELAY_MAX && tmp_delay < *work_bgn;
1411                 (*d)++, tmp_delay += IO_DELAY_PER_DQS_EN_DCHAIN_TAP) {
1412                 scc_mgr_set_dqs_en_delay_all_ranks(*grp, *d);
1413
1414                 if (rw_mgr_mem_calibrate_read_test_all_ranks(*grp, 1,
1415                                                              PASS_ONE_BIT,
1416                                                              bit_chk, 0)) {
1417                         found_begin = 1;
1418                         *work_bgn = tmp_delay;
1419                         break;
1420                 }
1421         }
1422
1423         /* We have found a working dtap before the ptap found above */
1424         if (found_begin == 1)
1425                 (*max_working_cnt)++;
1426
1427         /*
1428          * Restore VFIFO to old state before we decremented it
1429          * (if needed).
1430          */
1431         (*p)++;
1432         if (*p > IO_DQS_EN_PHASE_MAX) {
1433                 *p = 0;
1434                 rw_mgr_incr_vfifo(*grp, v);
1435         }
1436
1437         scc_mgr_set_dqs_en_delay_all_ranks(*grp, 0);
1438 }
1439
1440 static int sdr_nonworking_phase(uint32_t *grp, uint32_t *bit_chk,
1441                              uint32_t *work_bgn, uint32_t *v, uint32_t *d,
1442                              uint32_t *p, uint32_t *i, uint32_t *max_working_cnt,
1443                              uint32_t *work_end)
1444 {
1445         uint32_t found_end = 0;
1446
1447         (*p)++;
1448         *work_end += IO_DELAY_PER_OPA_TAP;
1449         if (*p > IO_DQS_EN_PHASE_MAX) {
1450                 /* fiddle with FIFO */
1451                 *p = 0;
1452                 rw_mgr_incr_vfifo(*grp, v);
1453         }
1454
1455         for (; *i < VFIFO_SIZE + 1; (*i)++) {
1456                 for (; *p <= IO_DQS_EN_PHASE_MAX; (*p)++, *work_end
1457                         += IO_DELAY_PER_OPA_TAP) {
1458                         scc_mgr_set_dqs_en_phase_all_ranks(*grp, *p);
1459
1460                         if (!rw_mgr_mem_calibrate_read_test_all_ranks
1461                                 (*grp, 1, PASS_ONE_BIT, bit_chk, 0)) {
1462                                 found_end = 1;
1463                                 break;
1464                         } else {
1465                                 (*max_working_cnt)++;
1466                         }
1467                 }
1468
1469                 if (found_end)
1470                         break;
1471
1472                 if (*p > IO_DQS_EN_PHASE_MAX) {
1473                         /* fiddle with FIFO */
1474                         rw_mgr_incr_vfifo(*grp, v);
1475                         *p = 0;
1476                 }
1477         }
1478
1479         if (*i >= VFIFO_SIZE + 1) {
1480                 /* cannot see edge of failing read */
1481                 debug_cond(DLEVEL == 2, "%s:%d sdr_nonworking_phase: end:\
1482                            failed\n", __func__, __LINE__);
1483                 return 0;
1484         } else {
1485                 return 1;
1486         }
1487 }
1488
1489 static int sdr_find_window_centre(uint32_t *grp, uint32_t *bit_chk,
1490                                   uint32_t *work_bgn, uint32_t *v, uint32_t *d,
1491                                   uint32_t *p, uint32_t *work_mid,
1492                                   uint32_t *work_end)
1493 {
1494         int i;
1495         int tmp_delay = 0;
1496
1497         *work_mid = (*work_bgn + *work_end) / 2;
1498
1499         debug_cond(DLEVEL == 2, "work_bgn=%d work_end=%d work_mid=%d\n",
1500                    *work_bgn, *work_end, *work_mid);
1501         /* Get the middle delay to be less than a VFIFO delay */
1502         for (*p = 0; *p <= IO_DQS_EN_PHASE_MAX;
1503                 (*p)++, tmp_delay += IO_DELAY_PER_OPA_TAP)
1504                 ;
1505         debug_cond(DLEVEL == 2, "vfifo ptap delay %d\n", tmp_delay);
1506         while (*work_mid > tmp_delay)
1507                 *work_mid -= tmp_delay;
1508         debug_cond(DLEVEL == 2, "new work_mid %d\n", *work_mid);
1509
1510         tmp_delay = 0;
1511         for (*p = 0; *p <= IO_DQS_EN_PHASE_MAX && tmp_delay < *work_mid;
1512                 (*p)++, tmp_delay += IO_DELAY_PER_OPA_TAP)
1513                 ;
1514         tmp_delay -= IO_DELAY_PER_OPA_TAP;
1515         debug_cond(DLEVEL == 2, "new p %d, tmp_delay=%d\n", (*p) - 1, tmp_delay);
1516         for (*d = 0; *d <= IO_DQS_EN_DELAY_MAX && tmp_delay < *work_mid; (*d)++,
1517                 tmp_delay += IO_DELAY_PER_DQS_EN_DCHAIN_TAP)
1518                 ;
1519         debug_cond(DLEVEL == 2, "new d %d, tmp_delay=%d\n", *d, tmp_delay);
1520
1521         scc_mgr_set_dqs_en_phase_all_ranks(*grp, (*p) - 1);
1522         scc_mgr_set_dqs_en_delay_all_ranks(*grp, *d);
1523
1524         /*
1525          * push vfifo until we can successfully calibrate. We can do this
1526          * because the largest possible margin in 1 VFIFO cycle.
1527          */
1528         for (i = 0; i < VFIFO_SIZE; i++) {
1529                 debug_cond(DLEVEL == 2, "find_dqs_en_phase: center: vfifo=%u\n",
1530                            *v);
1531                 if (rw_mgr_mem_calibrate_read_test_all_ranks(*grp, 1,
1532                                                              PASS_ONE_BIT,
1533                                                              bit_chk, 0)) {
1534                         break;
1535                 }
1536
1537                 /* fiddle with FIFO */
1538                 rw_mgr_incr_vfifo(*grp, v);
1539         }
1540
1541         if (i >= VFIFO_SIZE) {
1542                 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: center: \
1543                            failed\n", __func__, __LINE__);
1544                 return 0;
1545         } else {
1546                 return 1;
1547         }
1548 }
1549
1550 /* find a good dqs enable to use */
1551 static uint32_t rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase(uint32_t grp)
1552 {
1553         uint32_t v, d, p, i;
1554         uint32_t max_working_cnt;
1555         uint32_t bit_chk;
1556         uint32_t dtaps_per_ptap;
1557         uint32_t work_bgn, work_mid, work_end;
1558         uint32_t found_passing_read, found_failing_read, initial_failing_dtap;
1559
1560         debug("%s:%d %u\n", __func__, __LINE__, grp);
1561
1562         reg_file_set_sub_stage(CAL_SUBSTAGE_VFIFO_CENTER);
1563
1564         scc_mgr_set_dqs_en_delay_all_ranks(grp, 0);
1565         scc_mgr_set_dqs_en_phase_all_ranks(grp, 0);
1566
1567         /* ************************************************************** */
1568         /* * Step 0 : Determine number of delay taps for each phase tap * */
1569         dtaps_per_ptap = IO_DELAY_PER_OPA_TAP/IO_DELAY_PER_DQS_EN_DCHAIN_TAP;
1570
1571         /* ********************************************************* */
1572         /* * Step 1 : First push vfifo until we get a failing read * */
1573         v = find_vfifo_read(grp, &bit_chk);
1574
1575         max_working_cnt = 0;
1576
1577         /* ******************************************************** */
1578         /* * step 2: find first working phase, increment in ptaps * */
1579         work_bgn = 0;
1580         if (find_working_phase(&grp, &bit_chk, dtaps_per_ptap, &work_bgn, &v, &d,
1581                                 &p, &i, &max_working_cnt) == 0)
1582                 return 0;
1583
1584         work_end = work_bgn;
1585
1586         /*
1587          * If d is 0 then the working window covers a phase tap and
1588          * we can follow the old procedure otherwise, we've found the beginning,
1589          * and we need to increment the dtaps until we find the end.
1590          */
1591         if (d == 0) {
1592                 /* ********************************************************* */
1593                 /* * step 3a: if we have room, back off by one and
1594                 increment in dtaps * */
1595
1596                 sdr_backup_phase(&grp, &bit_chk, &work_bgn, &v, &d, &p,
1597                                  &max_working_cnt);
1598
1599                 /* ********************************************************* */
1600                 /* * step 4a: go forward from working phase to non working
1601                 phase, increment in ptaps * */
1602                 if (sdr_nonworking_phase(&grp, &bit_chk, &work_bgn, &v, &d, &p,
1603                                          &i, &max_working_cnt, &work_end) == 0)
1604                         return 0;
1605
1606                 /* ********************************************************* */
1607                 /* * step 5a:  back off one from last, increment in dtaps  * */
1608
1609                 /* Special case code for backing up a phase */
1610                 if (p == 0) {
1611                         p = IO_DQS_EN_PHASE_MAX;
1612                         rw_mgr_decr_vfifo(grp, &v);
1613                 } else {
1614                         p = p - 1;
1615                 }
1616
1617                 work_end -= IO_DELAY_PER_OPA_TAP;
1618                 scc_mgr_set_dqs_en_phase_all_ranks(grp, p);
1619
1620                 /* * The actual increment of dtaps is done outside of
1621                 the if/else loop to share code */
1622                 d = 0;
1623
1624                 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: v/p: \
1625                            vfifo=%u ptap=%u\n", __func__, __LINE__,
1626                            v, p);
1627         } else {
1628                 /* ******************************************************* */
1629                 /* * step 3-5b:  Find the right edge of the window using
1630                 delay taps   * */
1631                 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase:vfifo=%u \
1632                            ptap=%u dtap=%u bgn=%u\n", __func__, __LINE__,
1633                            v, p, d, work_bgn);
1634
1635                 work_end = work_bgn;
1636
1637                 /* * The actual increment of dtaps is done outside of the
1638                 if/else loop to share code */
1639
1640                 /* Only here to counterbalance a subtract later on which is
1641                 not needed if this branch of the algorithm is taken */
1642                 max_working_cnt++;
1643         }
1644
1645         /* The dtap increment to find the failing edge is done here */
1646         for (; d <= IO_DQS_EN_DELAY_MAX; d++, work_end +=
1647                 IO_DELAY_PER_DQS_EN_DCHAIN_TAP) {
1648                         debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: \
1649                                    end-2: dtap=%u\n", __func__, __LINE__, d);
1650                         scc_mgr_set_dqs_en_delay_all_ranks(grp, d);
1651
1652                         if (!rw_mgr_mem_calibrate_read_test_all_ranks(grp, 1,
1653                                                                       PASS_ONE_BIT,
1654                                                                       &bit_chk, 0)) {
1655                                 break;
1656                         }
1657         }
1658
1659         /* Go back to working dtap */
1660         if (d != 0)
1661                 work_end -= IO_DELAY_PER_DQS_EN_DCHAIN_TAP;
1662
1663         debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: v/p/d: vfifo=%u \
1664                    ptap=%u dtap=%u end=%u\n", __func__, __LINE__,
1665                    v, p, d-1, work_end);
1666
1667         if (work_end < work_bgn) {
1668                 /* nil range */
1669                 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: end-2: \
1670                            failed\n", __func__, __LINE__);
1671                 return 0;
1672         }
1673
1674         debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: found range [%u,%u]\n",
1675                    __func__, __LINE__, work_bgn, work_end);
1676
1677         /* *************************************************************** */
1678         /*
1679          * * We need to calculate the number of dtaps that equal a ptap
1680          * * To do that we'll back up a ptap and re-find the edge of the
1681          * * window using dtaps
1682          */
1683
1684         debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: calculate dtaps_per_ptap \
1685                    for tracking\n", __func__, __LINE__);
1686
1687         /* Special case code for backing up a phase */
1688         if (p == 0) {
1689                 p = IO_DQS_EN_PHASE_MAX;
1690                 rw_mgr_decr_vfifo(grp, &v);
1691                 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: backedup \
1692                            cycle/phase: v=%u p=%u\n", __func__, __LINE__,
1693                            v, p);
1694         } else {
1695                 p = p - 1;
1696                 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: backedup \
1697                            phase only: v=%u p=%u", __func__, __LINE__,
1698                            v, p);
1699         }
1700
1701         scc_mgr_set_dqs_en_phase_all_ranks(grp, p);
1702
1703         /*
1704          * Increase dtap until we first see a passing read (in case the
1705          * window is smaller than a ptap),
1706          * and then a failing read to mark the edge of the window again
1707          */
1708
1709         /* Find a passing read */
1710         debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: find passing read\n",
1711                    __func__, __LINE__);
1712         found_passing_read = 0;
1713         found_failing_read = 0;
1714         initial_failing_dtap = d;
1715         for (; d <= IO_DQS_EN_DELAY_MAX; d++) {
1716                 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: testing \
1717                            read d=%u\n", __func__, __LINE__, d);
1718                 scc_mgr_set_dqs_en_delay_all_ranks(grp, d);
1719
1720                 if (rw_mgr_mem_calibrate_read_test_all_ranks(grp, 1,
1721                                                              PASS_ONE_BIT,
1722                                                              &bit_chk, 0)) {
1723                         found_passing_read = 1;
1724                         break;
1725                 }
1726         }
1727
1728         if (found_passing_read) {
1729                 /* Find a failing read */
1730                 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: find failing \
1731                            read\n", __func__, __LINE__);
1732                 for (d = d + 1; d <= IO_DQS_EN_DELAY_MAX; d++) {
1733                         debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: \
1734                                    testing read d=%u\n", __func__, __LINE__, d);
1735                         scc_mgr_set_dqs_en_delay_all_ranks(grp, d);
1736
1737                         if (!rw_mgr_mem_calibrate_read_test_all_ranks
1738                                 (grp, 1, PASS_ONE_BIT, &bit_chk, 0)) {
1739                                 found_failing_read = 1;
1740                                 break;
1741                         }
1742                 }
1743         } else {
1744                 debug_cond(DLEVEL == 1, "%s:%d find_dqs_en_phase: failed to \
1745                            calculate dtaps", __func__, __LINE__);
1746                 debug_cond(DLEVEL == 1, "per ptap. Fall back on static value\n");
1747         }
1748
1749         /*
1750          * The dynamically calculated dtaps_per_ptap is only valid if we
1751          * found a passing/failing read. If we didn't, it means d hit the max
1752          * (IO_DQS_EN_DELAY_MAX). Otherwise, dtaps_per_ptap retains its
1753          * statically calculated value.
1754          */
1755         if (found_passing_read && found_failing_read)
1756                 dtaps_per_ptap = d - initial_failing_dtap;
1757
1758         writel(dtaps_per_ptap, &sdr_reg_file->dtaps_per_ptap);
1759         debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: dtaps_per_ptap=%u \
1760                    - %u = %u",  __func__, __LINE__, d,
1761                    initial_failing_dtap, dtaps_per_ptap);
1762
1763         /* ******************************************** */
1764         /* * step 6:  Find the centre of the window   * */
1765         if (sdr_find_window_centre(&grp, &bit_chk, &work_bgn, &v, &d, &p,
1766                                    &work_mid, &work_end) == 0)
1767                 return 0;
1768
1769         debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: center found: \
1770                    vfifo=%u ptap=%u dtap=%u\n", __func__, __LINE__,
1771                    v, p-1, d);
1772         return 1;
1773 }
1774
1775 /*
1776  * Try rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase across different
1777  * dq_in_delay values
1778  */
1779 static uint32_t
1780 rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase_sweep_dq_in_delay
1781 (uint32_t write_group, uint32_t read_group, uint32_t test_bgn)
1782 {
1783         uint32_t found;
1784         uint32_t i;
1785         uint32_t p;
1786         uint32_t d;
1787         uint32_t r;
1788
1789         const uint32_t delay_step = IO_IO_IN_DELAY_MAX /
1790                 (RW_MGR_MEM_DQ_PER_READ_DQS-1);
1791                 /* we start at zero, so have one less dq to devide among */
1792
1793         debug("%s:%d (%u,%u,%u)", __func__, __LINE__, write_group, read_group,
1794               test_bgn);
1795
1796         /* try different dq_in_delays since the dq path is shorter than dqs */
1797
1798         for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS;
1799              r += NUM_RANKS_PER_SHADOW_REG) {
1800                 for (i = 0, p = test_bgn, d = 0; i < RW_MGR_MEM_DQ_PER_READ_DQS;
1801                         i++, p++, d += delay_step) {
1802                         debug_cond(DLEVEL == 1, "%s:%d rw_mgr_mem_calibrate_\
1803                                    vfifo_find_dqs_", __func__, __LINE__);
1804                         debug_cond(DLEVEL == 1, "en_phase_sweep_dq_in_delay: g=%u/%u ",
1805                                write_group, read_group);
1806                         debug_cond(DLEVEL == 1, "r=%u, i=%u p=%u d=%u\n", r, i , p, d);
1807                         scc_mgr_set_dq_in_delay(p, d);
1808                         scc_mgr_load_dq(p);
1809                 }
1810                 writel(0, &sdr_scc_mgr->update);
1811         }
1812
1813         found = rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase(read_group);
1814
1815         debug_cond(DLEVEL == 1, "%s:%d rw_mgr_mem_calibrate_vfifo_find_dqs_\
1816                    en_phase_sweep_dq", __func__, __LINE__);
1817         debug_cond(DLEVEL == 1, "_in_delay: g=%u/%u found=%u; Reseting delay \
1818                    chain to zero\n", write_group, read_group, found);
1819
1820         for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS;
1821              r += NUM_RANKS_PER_SHADOW_REG) {
1822                 for (i = 0, p = test_bgn; i < RW_MGR_MEM_DQ_PER_READ_DQS;
1823                         i++, p++) {
1824                         scc_mgr_set_dq_in_delay(p, 0);
1825                         scc_mgr_load_dq(p);
1826                 }
1827                 writel(0, &sdr_scc_mgr->update);
1828         }
1829
1830         return found;
1831 }
1832
1833 /* per-bit deskew DQ and center */
1834 static uint32_t rw_mgr_mem_calibrate_vfifo_center(uint32_t rank_bgn,
1835         uint32_t write_group, uint32_t read_group, uint32_t test_bgn,
1836         uint32_t use_read_test, uint32_t update_fom)
1837 {
1838         uint32_t i, p, d, min_index;
1839         /*
1840          * Store these as signed since there are comparisons with
1841          * signed numbers.
1842          */
1843         uint32_t bit_chk;
1844         uint32_t sticky_bit_chk;
1845         int32_t left_edge[RW_MGR_MEM_DQ_PER_READ_DQS];
1846         int32_t right_edge[RW_MGR_MEM_DQ_PER_READ_DQS];
1847         int32_t final_dq[RW_MGR_MEM_DQ_PER_READ_DQS];
1848         int32_t mid;
1849         int32_t orig_mid_min, mid_min;
1850         int32_t new_dqs, start_dqs, start_dqs_en, shift_dq, final_dqs,
1851                 final_dqs_en;
1852         int32_t dq_margin, dqs_margin;
1853         uint32_t stop;
1854         uint32_t temp_dq_in_delay1, temp_dq_in_delay2;
1855         uint32_t addr;
1856
1857         debug("%s:%d: %u %u", __func__, __LINE__, read_group, test_bgn);
1858
1859         addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_DQS_IN_DELAY_OFFSET;
1860         start_dqs = readl(addr + (read_group << 2));
1861         if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS)
1862                 start_dqs_en = readl(addr + ((read_group << 2)
1863                                      - IO_DQS_EN_DELAY_OFFSET));
1864
1865         /* set the left and right edge of each bit to an illegal value */
1866         /* use (IO_IO_IN_DELAY_MAX + 1) as an illegal value */
1867         sticky_bit_chk = 0;
1868         for (i = 0; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++) {
1869                 left_edge[i]  = IO_IO_IN_DELAY_MAX + 1;
1870                 right_edge[i] = IO_IO_IN_DELAY_MAX + 1;
1871         }
1872
1873         /* Search for the left edge of the window for each bit */
1874         for (d = 0; d <= IO_IO_IN_DELAY_MAX; d++) {
1875                 scc_mgr_apply_group_dq_in_delay(write_group, test_bgn, d);
1876
1877                 writel(0, &sdr_scc_mgr->update);
1878
1879                 /*
1880                  * Stop searching when the read test doesn't pass AND when
1881                  * we've seen a passing read on every bit.
1882                  */
1883                 if (use_read_test) {
1884                         stop = !rw_mgr_mem_calibrate_read_test(rank_bgn,
1885                                 read_group, NUM_READ_PB_TESTS, PASS_ONE_BIT,
1886                                 &bit_chk, 0, 0);
1887                 } else {
1888                         rw_mgr_mem_calibrate_write_test(rank_bgn, write_group,
1889                                                         0, PASS_ONE_BIT,
1890                                                         &bit_chk, 0);
1891                         bit_chk = bit_chk >> (RW_MGR_MEM_DQ_PER_READ_DQS *
1892                                 (read_group - (write_group *
1893                                         RW_MGR_MEM_IF_READ_DQS_WIDTH /
1894                                         RW_MGR_MEM_IF_WRITE_DQS_WIDTH)));
1895                         stop = (bit_chk == 0);
1896                 }
1897                 sticky_bit_chk = sticky_bit_chk | bit_chk;
1898                 stop = stop && (sticky_bit_chk == param->read_correct_mask);
1899                 debug_cond(DLEVEL == 2, "%s:%d vfifo_center(left): dtap=%u => %u == %u \
1900                            && %u", __func__, __LINE__, d,
1901                            sticky_bit_chk,
1902                         param->read_correct_mask, stop);
1903
1904                 if (stop == 1) {
1905                         break;
1906                 } else {
1907                         for (i = 0; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++) {
1908                                 if (bit_chk & 1) {
1909                                         /* Remember a passing test as the
1910                                         left_edge */
1911                                         left_edge[i] = d;
1912                                 } else {
1913                                         /* If a left edge has not been seen yet,
1914                                         then a future passing test will mark
1915                                         this edge as the right edge */
1916                                         if (left_edge[i] ==
1917                                                 IO_IO_IN_DELAY_MAX + 1) {
1918                                                 right_edge[i] = -(d + 1);
1919                                         }
1920                                 }
1921                                 bit_chk = bit_chk >> 1;
1922                         }
1923                 }
1924         }
1925
1926         /* Reset DQ delay chains to 0 */
1927         scc_mgr_apply_group_dq_in_delay(write_group, test_bgn, 0);
1928         sticky_bit_chk = 0;
1929         for (i = RW_MGR_MEM_DQ_PER_READ_DQS - 1;; i--) {
1930                 debug_cond(DLEVEL == 2, "%s:%d vfifo_center: left_edge[%u]: \
1931                            %d right_edge[%u]: %d\n", __func__, __LINE__,
1932                            i, left_edge[i], i, right_edge[i]);
1933
1934                 /*
1935                  * Check for cases where we haven't found the left edge,
1936                  * which makes our assignment of the the right edge invalid.
1937                  * Reset it to the illegal value.
1938                  */
1939                 if ((left_edge[i] == IO_IO_IN_DELAY_MAX + 1) && (
1940                         right_edge[i] != IO_IO_IN_DELAY_MAX + 1)) {
1941                         right_edge[i] = IO_IO_IN_DELAY_MAX + 1;
1942                         debug_cond(DLEVEL == 2, "%s:%d vfifo_center: reset \
1943                                    right_edge[%u]: %d\n", __func__, __LINE__,
1944                                    i, right_edge[i]);
1945                 }
1946
1947                 /*
1948                  * Reset sticky bit (except for bits where we have seen
1949                  * both the left and right edge).
1950                  */
1951                 sticky_bit_chk = sticky_bit_chk << 1;
1952                 if ((left_edge[i] != IO_IO_IN_DELAY_MAX + 1) &&
1953                     (right_edge[i] != IO_IO_IN_DELAY_MAX + 1)) {
1954                         sticky_bit_chk = sticky_bit_chk | 1;
1955                 }
1956
1957                 if (i == 0)
1958                         break;
1959         }
1960
1961         /* Search for the right edge of the window for each bit */
1962         for (d = 0; d <= IO_DQS_IN_DELAY_MAX - start_dqs; d++) {
1963                 scc_mgr_set_dqs_bus_in_delay(read_group, d + start_dqs);
1964                 if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS) {
1965                         uint32_t delay = d + start_dqs_en;
1966                         if (delay > IO_DQS_EN_DELAY_MAX)
1967                                 delay = IO_DQS_EN_DELAY_MAX;
1968                         scc_mgr_set_dqs_en_delay(read_group, delay);
1969                 }
1970                 scc_mgr_load_dqs(read_group);
1971
1972                 writel(0, &sdr_scc_mgr->update);
1973
1974                 /*
1975                  * Stop searching when the read test doesn't pass AND when
1976                  * we've seen a passing read on every bit.
1977                  */
1978                 if (use_read_test) {
1979                         stop = !rw_mgr_mem_calibrate_read_test(rank_bgn,
1980                                 read_group, NUM_READ_PB_TESTS, PASS_ONE_BIT,
1981                                 &bit_chk, 0, 0);
1982                 } else {
1983                         rw_mgr_mem_calibrate_write_test(rank_bgn, write_group,
1984                                                         0, PASS_ONE_BIT,
1985                                                         &bit_chk, 0);
1986                         bit_chk = bit_chk >> (RW_MGR_MEM_DQ_PER_READ_DQS *
1987                                 (read_group - (write_group *
1988                                         RW_MGR_MEM_IF_READ_DQS_WIDTH /
1989                                         RW_MGR_MEM_IF_WRITE_DQS_WIDTH)));
1990                         stop = (bit_chk == 0);
1991                 }
1992                 sticky_bit_chk = sticky_bit_chk | bit_chk;
1993                 stop = stop && (sticky_bit_chk == param->read_correct_mask);
1994
1995                 debug_cond(DLEVEL == 2, "%s:%d vfifo_center(right): dtap=%u => %u == \
1996                            %u && %u", __func__, __LINE__, d,
1997                            sticky_bit_chk, param->read_correct_mask, stop);
1998
1999                 if (stop == 1) {
2000                         break;
2001                 } else {
2002                         for (i = 0; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++) {
2003                                 if (bit_chk & 1) {
2004                                         /* Remember a passing test as
2005                                         the right_edge */
2006                                         right_edge[i] = d;
2007                                 } else {
2008                                         if (d != 0) {
2009                                                 /* If a right edge has not been
2010                                                 seen yet, then a future passing
2011                                                 test will mark this edge as the
2012                                                 left edge */
2013                                                 if (right_edge[i] ==
2014                                                 IO_IO_IN_DELAY_MAX + 1) {
2015                                                         left_edge[i] = -(d + 1);
2016                                                 }
2017                                         } else {
2018                                                 /* d = 0 failed, but it passed
2019                                                 when testing the left edge,
2020                                                 so it must be marginal,
2021                                                 set it to -1 */
2022                                                 if (right_edge[i] ==
2023                                                         IO_IO_IN_DELAY_MAX + 1 &&
2024                                                         left_edge[i] !=
2025                                                         IO_IO_IN_DELAY_MAX
2026                                                         + 1) {
2027                                                         right_edge[i] = -1;
2028                                                 }
2029                                                 /* If a right edge has not been
2030                                                 seen yet, then a future passing
2031                                                 test will mark this edge as the
2032                                                 left edge */
2033                                                 else if (right_edge[i] ==
2034                                                         IO_IO_IN_DELAY_MAX +
2035                                                         1) {
2036                                                         left_edge[i] = -(d + 1);
2037                                                 }
2038                                         }
2039                                 }
2040
2041                                 debug_cond(DLEVEL == 2, "%s:%d vfifo_center[r,\
2042                                            d=%u]: ", __func__, __LINE__, d);
2043                                 debug_cond(DLEVEL == 2, "bit_chk_test=%d left_edge[%u]: %d ",
2044                                            (int)(bit_chk & 1), i, left_edge[i]);
2045                                 debug_cond(DLEVEL == 2, "right_edge[%u]: %d\n", i,
2046                                            right_edge[i]);
2047                                 bit_chk = bit_chk >> 1;
2048                         }
2049                 }
2050         }
2051
2052         /* Check that all bits have a window */
2053         for (i = 0; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++) {
2054                 debug_cond(DLEVEL == 2, "%s:%d vfifo_center: left_edge[%u]: \
2055                            %d right_edge[%u]: %d", __func__, __LINE__,
2056                            i, left_edge[i], i, right_edge[i]);
2057                 if ((left_edge[i] == IO_IO_IN_DELAY_MAX + 1) || (right_edge[i]
2058                         == IO_IO_IN_DELAY_MAX + 1)) {
2059                         /*
2060                          * Restore delay chain settings before letting the loop
2061                          * in rw_mgr_mem_calibrate_vfifo to retry different
2062                          * dqs/ck relationships.
2063                          */
2064                         scc_mgr_set_dqs_bus_in_delay(read_group, start_dqs);
2065                         if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS) {
2066                                 scc_mgr_set_dqs_en_delay(read_group,
2067                                                          start_dqs_en);
2068                         }
2069                         scc_mgr_load_dqs(read_group);
2070                         writel(0, &sdr_scc_mgr->update);
2071
2072                         debug_cond(DLEVEL == 1, "%s:%d vfifo_center: failed to \
2073                                    find edge [%u]: %d %d", __func__, __LINE__,
2074                                    i, left_edge[i], right_edge[i]);
2075                         if (use_read_test) {
2076                                 set_failing_group_stage(read_group *
2077                                         RW_MGR_MEM_DQ_PER_READ_DQS + i,
2078                                         CAL_STAGE_VFIFO,
2079                                         CAL_SUBSTAGE_VFIFO_CENTER);
2080                         } else {
2081                                 set_failing_group_stage(read_group *
2082                                         RW_MGR_MEM_DQ_PER_READ_DQS + i,
2083                                         CAL_STAGE_VFIFO_AFTER_WRITES,
2084                                         CAL_SUBSTAGE_VFIFO_CENTER);
2085                         }
2086                         return 0;
2087                 }
2088         }
2089
2090         /* Find middle of window for each DQ bit */
2091         mid_min = left_edge[0] - right_edge[0];
2092         min_index = 0;
2093         for (i = 1; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++) {
2094                 mid = left_edge[i] - right_edge[i];
2095                 if (mid < mid_min) {
2096                         mid_min = mid;
2097                         min_index = i;
2098                 }
2099         }
2100
2101         /*
2102          * -mid_min/2 represents the amount that we need to move DQS.
2103          * If mid_min is odd and positive we'll need to add one to
2104          * make sure the rounding in further calculations is correct
2105          * (always bias to the right), so just add 1 for all positive values.
2106          */
2107         if (mid_min > 0)
2108                 mid_min++;
2109
2110         mid_min = mid_min / 2;
2111
2112         debug_cond(DLEVEL == 1, "%s:%d vfifo_center: mid_min=%d (index=%u)\n",
2113                    __func__, __LINE__, mid_min, min_index);
2114
2115         /* Determine the amount we can change DQS (which is -mid_min) */
2116         orig_mid_min = mid_min;
2117         new_dqs = start_dqs - mid_min;
2118         if (new_dqs > IO_DQS_IN_DELAY_MAX)
2119                 new_dqs = IO_DQS_IN_DELAY_MAX;
2120         else if (new_dqs < 0)
2121                 new_dqs = 0;
2122
2123         mid_min = start_dqs - new_dqs;
2124         debug_cond(DLEVEL == 1, "vfifo_center: new mid_min=%d new_dqs=%d\n",
2125                    mid_min, new_dqs);
2126
2127         if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS) {
2128                 if (start_dqs_en - mid_min > IO_DQS_EN_DELAY_MAX)
2129                         mid_min += start_dqs_en - mid_min - IO_DQS_EN_DELAY_MAX;
2130                 else if (start_dqs_en - mid_min < 0)
2131                         mid_min += start_dqs_en - mid_min;
2132         }
2133         new_dqs = start_dqs - mid_min;
2134
2135         debug_cond(DLEVEL == 1, "vfifo_center: start_dqs=%d start_dqs_en=%d \
2136                    new_dqs=%d mid_min=%d\n", start_dqs,
2137                    IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS ? start_dqs_en : -1,
2138                    new_dqs, mid_min);
2139
2140         /* Initialize data for export structures */
2141         dqs_margin = IO_IO_IN_DELAY_MAX + 1;
2142         dq_margin  = IO_IO_IN_DELAY_MAX + 1;
2143
2144         /* add delay to bring centre of all DQ windows to the same "level" */
2145         for (i = 0, p = test_bgn; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++, p++) {
2146                 /* Use values before divide by 2 to reduce round off error */
2147                 shift_dq = (left_edge[i] - right_edge[i] -
2148                         (left_edge[min_index] - right_edge[min_index]))/2  +
2149                         (orig_mid_min - mid_min);
2150
2151                 debug_cond(DLEVEL == 2, "vfifo_center: before: \
2152                            shift_dq[%u]=%d\n", i, shift_dq);
2153
2154                 addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_IO_IN_DELAY_OFFSET;
2155                 temp_dq_in_delay1 = readl(addr + (p << 2));
2156                 temp_dq_in_delay2 = readl(addr + (i << 2));
2157
2158                 if (shift_dq + (int32_t)temp_dq_in_delay1 >
2159                         (int32_t)IO_IO_IN_DELAY_MAX) {
2160                         shift_dq = (int32_t)IO_IO_IN_DELAY_MAX - temp_dq_in_delay2;
2161                 } else if (shift_dq + (int32_t)temp_dq_in_delay1 < 0) {
2162                         shift_dq = -(int32_t)temp_dq_in_delay1;
2163                 }
2164                 debug_cond(DLEVEL == 2, "vfifo_center: after: \
2165                            shift_dq[%u]=%d\n", i, shift_dq);
2166                 final_dq[i] = temp_dq_in_delay1 + shift_dq;
2167                 scc_mgr_set_dq_in_delay(p, final_dq[i]);
2168                 scc_mgr_load_dq(p);
2169
2170                 debug_cond(DLEVEL == 2, "vfifo_center: margin[%u]=[%d,%d]\n", i,
2171                            left_edge[i] - shift_dq + (-mid_min),
2172                            right_edge[i] + shift_dq - (-mid_min));
2173                 /* To determine values for export structures */
2174                 if (left_edge[i] - shift_dq + (-mid_min) < dq_margin)
2175                         dq_margin = left_edge[i] - shift_dq + (-mid_min);
2176
2177                 if (right_edge[i] + shift_dq - (-mid_min) < dqs_margin)
2178                         dqs_margin = right_edge[i] + shift_dq - (-mid_min);
2179         }
2180
2181         final_dqs = new_dqs;
2182         if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS)
2183                 final_dqs_en = start_dqs_en - mid_min;
2184
2185         /* Move DQS-en */
2186         if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS) {
2187                 scc_mgr_set_dqs_en_delay(read_group, final_dqs_en);
2188                 scc_mgr_load_dqs(read_group);
2189         }
2190
2191         /* Move DQS */
2192         scc_mgr_set_dqs_bus_in_delay(read_group, final_dqs);
2193         scc_mgr_load_dqs(read_group);
2194         debug_cond(DLEVEL == 2, "%s:%d vfifo_center: dq_margin=%d \
2195                    dqs_margin=%d", __func__, __LINE__,
2196                    dq_margin, dqs_margin);
2197
2198         /*
2199          * Do not remove this line as it makes sure all of our decisions
2200          * have been applied. Apply the update bit.
2201          */
2202         writel(0, &sdr_scc_mgr->update);
2203
2204         return (dq_margin >= 0) && (dqs_margin >= 0);
2205 }
2206
2207 /*
2208  * calibrate the read valid prediction FIFO.
2209  *
2210  *  - read valid prediction will consist of finding a good DQS enable phase,
2211  * DQS enable delay, DQS input phase, and DQS input delay.
2212  *  - we also do a per-bit deskew on the DQ lines.
2213  */
2214 static uint32_t rw_mgr_mem_calibrate_vfifo(uint32_t read_group,
2215                                            uint32_t test_bgn)
2216 {
2217         uint32_t p, d, rank_bgn, sr;
2218         uint32_t dtaps_per_ptap;
2219         uint32_t tmp_delay;
2220         uint32_t bit_chk;
2221         uint32_t grp_calibrated;
2222         uint32_t write_group, write_test_bgn;
2223         uint32_t failed_substage;
2224
2225         debug("%s:%d: %u %u\n", __func__, __LINE__, read_group, test_bgn);
2226
2227         /* update info for sims */
2228         reg_file_set_stage(CAL_STAGE_VFIFO);
2229
2230         write_group = read_group;
2231         write_test_bgn = test_bgn;
2232
2233         /* USER Determine number of delay taps for each phase tap */
2234         dtaps_per_ptap = 0;
2235         tmp_delay = 0;
2236         while (tmp_delay < IO_DELAY_PER_OPA_TAP) {
2237                 dtaps_per_ptap++;
2238                 tmp_delay += IO_DELAY_PER_DQS_EN_DCHAIN_TAP;
2239         }
2240         dtaps_per_ptap--;
2241         tmp_delay = 0;
2242
2243         /* update info for sims */
2244         reg_file_set_group(read_group);
2245
2246         grp_calibrated = 0;
2247
2248         reg_file_set_sub_stage(CAL_SUBSTAGE_GUARANTEED_READ);
2249         failed_substage = CAL_SUBSTAGE_GUARANTEED_READ;
2250
2251         for (d = 0; d <= dtaps_per_ptap && grp_calibrated == 0; d += 2) {
2252                 /*
2253                  * In RLDRAMX we may be messing the delay of pins in
2254                  * the same write group but outside of the current read
2255                  * the group, but that's ok because we haven't
2256                  * calibrated output side yet.
2257                  */
2258                 if (d > 0) {
2259                         scc_mgr_apply_group_all_out_delay_add_all_ranks
2260                         (write_group, write_test_bgn, d);
2261                 }
2262
2263                 for (p = 0; p <= IO_DQDQS_OUT_PHASE_MAX && grp_calibrated == 0;
2264                         p++) {
2265                         /* set a particular dqdqs phase */
2266                         scc_mgr_set_dqdqs_output_phase_all_ranks(read_group, p);
2267
2268                         debug_cond(DLEVEL == 1, "%s:%d calibrate_vfifo: g=%u \
2269                                    p=%u d=%u\n", __func__, __LINE__,
2270                                    read_group, p, d);
2271
2272                         /*
2273                          * Load up the patterns used by read calibration
2274                          * using current DQDQS phase.
2275                          */
2276                         rw_mgr_mem_calibrate_read_load_patterns(0, 1);
2277                         if (!(gbl->phy_debug_mode_flags &
2278                                 PHY_DEBUG_DISABLE_GUARANTEED_READ)) {
2279                                 if (!rw_mgr_mem_calibrate_read_test_patterns_all_ranks
2280                                     (read_group, 1, &bit_chk)) {
2281                                         debug_cond(DLEVEL == 1, "%s:%d Guaranteed read test failed:",
2282                                                    __func__, __LINE__);
2283                                         debug_cond(DLEVEL == 1, " g=%u p=%u d=%u\n",
2284                                                    read_group, p, d);
2285                                         break;
2286                                 }
2287                         }
2288
2289 /* case:56390 */
2290                         grp_calibrated = 1;
2291                 if (rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase_sweep_dq_in_delay
2292                     (write_group, read_group, test_bgn)) {
2293                                 /*
2294                                  * USER Read per-bit deskew can be done on a
2295                                  * per shadow register basis.
2296                                  */
2297                                 for (rank_bgn = 0, sr = 0;
2298                                         rank_bgn < RW_MGR_MEM_NUMBER_OF_RANKS;
2299                                         rank_bgn += NUM_RANKS_PER_SHADOW_REG,
2300                                         ++sr) {
2301                                         /*
2302                                          * Determine if this set of ranks
2303                                          * should be skipped entirely.
2304                                          */
2305                                         if (!param->skip_shadow_regs[sr]) {
2306                                                 /*
2307                                                  * If doing read after write
2308                                                  * calibration, do not update
2309                                                  * FOM, now - do it then.
2310                                                  */
2311                                         if (!rw_mgr_mem_calibrate_vfifo_center
2312                                                 (rank_bgn, write_group,
2313                                                 read_group, test_bgn, 1, 0)) {
2314                                                         grp_calibrated = 0;
2315                                                         failed_substage =
2316                                                 CAL_SUBSTAGE_VFIFO_CENTER;
2317                                                 }
2318                                         }
2319                                 }
2320                         } else {
2321                                 grp_calibrated = 0;
2322                                 failed_substage = CAL_SUBSTAGE_DQS_EN_PHASE;
2323                         }
2324                 }
2325         }
2326
2327         if (grp_calibrated == 0) {
2328                 set_failing_group_stage(write_group, CAL_STAGE_VFIFO,
2329                                         failed_substage);
2330                 return 0;
2331         }
2332
2333         /*
2334          * Reset the delay chains back to zero if they have moved > 1
2335          * (check for > 1 because loop will increase d even when pass in
2336          * first case).
2337          */
2338         if (d > 2)
2339                 scc_mgr_zero_group(write_group, write_test_bgn, 1);
2340
2341         return 1;
2342 }
2343
2344 /* VFIFO Calibration -- Read Deskew Calibration after write deskew */
2345 static uint32_t rw_mgr_mem_calibrate_vfifo_end(uint32_t read_group,
2346                                                uint32_t test_bgn)
2347 {
2348         uint32_t rank_bgn, sr;
2349         uint32_t grp_calibrated;
2350         uint32_t write_group;
2351
2352         debug("%s:%d %u %u", __func__, __LINE__, read_group, test_bgn);
2353
2354         /* update info for sims */
2355
2356         reg_file_set_stage(CAL_STAGE_VFIFO_AFTER_WRITES);
2357         reg_file_set_sub_stage(CAL_SUBSTAGE_VFIFO_CENTER);
2358
2359         write_group = read_group;
2360
2361         /* update info for sims */
2362         reg_file_set_group(read_group);
2363
2364         grp_calibrated = 1;
2365         /* Read per-bit deskew can be done on a per shadow register basis */
2366         for (rank_bgn = 0, sr = 0; rank_bgn < RW_MGR_MEM_NUMBER_OF_RANKS;
2367                 rank_bgn += NUM_RANKS_PER_SHADOW_REG, ++sr) {
2368                 /* Determine if this set of ranks should be skipped entirely */
2369                 if (!param->skip_shadow_regs[sr]) {
2370                 /* This is the last calibration round, update FOM here */
2371                         if (!rw_mgr_mem_calibrate_vfifo_center(rank_bgn,
2372                                                                 write_group,
2373                                                                 read_group,
2374                                                                 test_bgn, 0,
2375                                                                 1)) {
2376                                 grp_calibrated = 0;
2377                         }
2378                 }
2379         }
2380
2381
2382         if (grp_calibrated == 0) {
2383                 set_failing_group_stage(write_group,
2384                                         CAL_STAGE_VFIFO_AFTER_WRITES,
2385                                         CAL_SUBSTAGE_VFIFO_CENTER);
2386                 return 0;
2387         }
2388
2389         return 1;
2390 }
2391
2392 /* Calibrate LFIFO to find smallest read latency */
2393 static uint32_t rw_mgr_mem_calibrate_lfifo(void)
2394 {
2395         uint32_t found_one;
2396         uint32_t bit_chk;
2397
2398         debug("%s:%d\n", __func__, __LINE__);
2399
2400         /* update info for sims */
2401         reg_file_set_stage(CAL_STAGE_LFIFO);
2402         reg_file_set_sub_stage(CAL_SUBSTAGE_READ_LATENCY);
2403
2404         /* Load up the patterns used by read calibration for all ranks */
2405         rw_mgr_mem_calibrate_read_load_patterns(0, 1);
2406         found_one = 0;
2407
2408         do {
2409                 writel(gbl->curr_read_lat, &phy_mgr_cfg->phy_rlat);
2410                 debug_cond(DLEVEL == 2, "%s:%d lfifo: read_lat=%u",
2411                            __func__, __LINE__, gbl->curr_read_lat);
2412
2413                 if (!rw_mgr_mem_calibrate_read_test_all_ranks(0,
2414                                                               NUM_READ_TESTS,
2415                                                               PASS_ALL_BITS,
2416                                                               &bit_chk, 1)) {
2417                         break;
2418                 }
2419
2420                 found_one = 1;
2421                 /* reduce read latency and see if things are working */
2422                 /* correctly */
2423                 gbl->curr_read_lat--;
2424         } while (gbl->curr_read_lat > 0);
2425
2426         /* reset the fifos to get pointers to known state */
2427
2428         writel(0, &phy_mgr_cmd->fifo_reset);
2429
2430         if (found_one) {
2431                 /* add a fudge factor to the read latency that was determined */
2432                 gbl->curr_read_lat += 2;
2433                 writel(gbl->curr_read_lat, &phy_mgr_cfg->phy_rlat);
2434                 debug_cond(DLEVEL == 2, "%s:%d lfifo: success: using \
2435                            read_lat=%u\n", __func__, __LINE__,
2436                            gbl->curr_read_lat);
2437                 return 1;
2438         } else {
2439                 set_failing_group_stage(0xff, CAL_STAGE_LFIFO,
2440                                         CAL_SUBSTAGE_READ_LATENCY);
2441
2442                 debug_cond(DLEVEL == 2, "%s:%d lfifo: failed at initial \
2443                            read_lat=%u\n", __func__, __LINE__,
2444                            gbl->curr_read_lat);
2445                 return 0;
2446         }
2447 }
2448
2449 /*
2450  * issue write test command.
2451  * two variants are provided. one that just tests a write pattern and
2452  * another that tests datamask functionality.
2453  */
2454 static void rw_mgr_mem_calibrate_write_test_issue(uint32_t group,
2455                                                   uint32_t test_dm)
2456 {
2457         uint32_t mcc_instruction;
2458         uint32_t quick_write_mode = (((STATIC_CALIB_STEPS) & CALIB_SKIP_WRITES) &&
2459                 ENABLE_SUPER_QUICK_CALIBRATION);
2460         uint32_t rw_wl_nop_cycles;
2461         uint32_t addr;
2462
2463         /*
2464          * Set counter and jump addresses for the right
2465          * number of NOP cycles.
2466          * The number of supported NOP cycles can range from -1 to infinity
2467          * Three different cases are handled:
2468          *
2469          * 1. For a number of NOP cycles greater than 0, the RW Mgr looping
2470          *    mechanism will be used to insert the right number of NOPs
2471          *
2472          * 2. For a number of NOP cycles equals to 0, the micro-instruction
2473          *    issuing the write command will jump straight to the
2474          *    micro-instruction that turns on DQS (for DDRx), or outputs write
2475          *    data (for RLD), skipping
2476          *    the NOP micro-instruction all together
2477          *
2478          * 3. A number of NOP cycles equal to -1 indicates that DQS must be
2479          *    turned on in the same micro-instruction that issues the write
2480          *    command. Then we need
2481          *    to directly jump to the micro-instruction that sends out the data
2482          *
2483          * NOTE: Implementing this mechanism uses 2 RW Mgr jump-counters
2484          *       (2 and 3). One jump-counter (0) is used to perform multiple
2485          *       write-read operations.
2486          *       one counter left to issue this command in "multiple-group" mode
2487          */
2488
2489         rw_wl_nop_cycles = gbl->rw_wl_nop_cycles;
2490
2491         if (rw_wl_nop_cycles == -1) {
2492                 /*
2493                  * CNTR 2 - We want to execute the special write operation that
2494                  * turns on DQS right away and then skip directly to the
2495                  * instruction that sends out the data. We set the counter to a
2496                  * large number so that the jump is always taken.
2497                  */
2498                 writel(0xFF, &sdr_rw_load_mgr_regs->load_cntr2);
2499
2500                 /* CNTR 3 - Not used */
2501                 if (test_dm) {
2502                         mcc_instruction = RW_MGR_LFSR_WR_RD_DM_BANK_0_WL_1;
2503                         writel(RW_MGR_LFSR_WR_RD_DM_BANK_0_DATA,
2504                                &sdr_rw_load_jump_mgr_regs->load_jump_add2);
2505                         writel(RW_MGR_LFSR_WR_RD_DM_BANK_0_NOP,
2506                                &sdr_rw_load_jump_mgr_regs->load_jump_add3);
2507                 } else {
2508                         mcc_instruction = RW_MGR_LFSR_WR_RD_BANK_0_WL_1;
2509                         writel(RW_MGR_LFSR_WR_RD_BANK_0_DATA,
2510                                 &sdr_rw_load_jump_mgr_regs->load_jump_add2);
2511                         writel(RW_MGR_LFSR_WR_RD_BANK_0_NOP,
2512                                 &sdr_rw_load_jump_mgr_regs->load_jump_add3);
2513                 }
2514         } else if (rw_wl_nop_cycles == 0) {
2515                 /*
2516                  * CNTR 2 - We want to skip the NOP operation and go straight
2517                  * to the DQS enable instruction. We set the counter to a large
2518                  * number so that the jump is always taken.
2519                  */
2520                 writel(0xFF, &sdr_rw_load_mgr_regs->load_cntr2);
2521
2522                 /* CNTR 3 - Not used */
2523                 if (test_dm) {
2524                         mcc_instruction = RW_MGR_LFSR_WR_RD_DM_BANK_0;
2525                         writel(RW_MGR_LFSR_WR_RD_DM_BANK_0_DQS,
2526                                &sdr_rw_load_jump_mgr_regs->load_jump_add2);
2527                 } else {
2528                         mcc_instruction = RW_MGR_LFSR_WR_RD_BANK_0;
2529                         writel(RW_MGR_LFSR_WR_RD_BANK_0_DQS,
2530                                 &sdr_rw_load_jump_mgr_regs->load_jump_add2);
2531                 }
2532         } else {
2533                 /*
2534                  * CNTR 2 - In this case we want to execute the next instruction
2535                  * and NOT take the jump. So we set the counter to 0. The jump
2536                  * address doesn't count.
2537                  */
2538                 writel(0x0, &sdr_rw_load_mgr_regs->load_cntr2);
2539                 writel(0x0, &sdr_rw_load_jump_mgr_regs->load_jump_add2);
2540
2541                 /*
2542                  * CNTR 3 - Set the nop counter to the number of cycles we
2543                  * need to loop for, minus 1.
2544                  */
2545                 writel(rw_wl_nop_cycles - 1, &sdr_rw_load_mgr_regs->load_cntr3);
2546                 if (test_dm) {
2547                         mcc_instruction = RW_MGR_LFSR_WR_RD_DM_BANK_0;
2548                         writel(RW_MGR_LFSR_WR_RD_DM_BANK_0_NOP,
2549                                 &sdr_rw_load_jump_mgr_regs->load_jump_add3);
2550                 } else {
2551                         mcc_instruction = RW_MGR_LFSR_WR_RD_BANK_0;
2552                         writel(RW_MGR_LFSR_WR_RD_BANK_0_NOP,
2553                                 &sdr_rw_load_jump_mgr_regs->load_jump_add3);
2554                 }
2555         }
2556
2557         writel(0, SDR_PHYGRP_RWMGRGRP_ADDRESS |
2558                   RW_MGR_RESET_READ_DATAPATH_OFFSET);
2559
2560         if (quick_write_mode)
2561                 writel(0x08, &sdr_rw_load_mgr_regs->load_cntr0);
2562         else
2563                 writel(0x40, &sdr_rw_load_mgr_regs->load_cntr0);
2564
2565         writel(mcc_instruction, &sdr_rw_load_jump_mgr_regs->load_jump_add0);
2566
2567         /*
2568          * CNTR 1 - This is used to ensure enough time elapses
2569          * for read data to come back.
2570          */
2571         writel(0x30, &sdr_rw_load_mgr_regs->load_cntr1);
2572
2573         if (test_dm) {
2574                 writel(RW_MGR_LFSR_WR_RD_DM_BANK_0_WAIT,
2575                         &sdr_rw_load_jump_mgr_regs->load_jump_add1);
2576         } else {
2577                 writel(RW_MGR_LFSR_WR_RD_BANK_0_WAIT,
2578                         &sdr_rw_load_jump_mgr_regs->load_jump_add1);
2579         }
2580
2581         addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET;
2582         writel(mcc_instruction, addr + (group << 2));
2583 }
2584
2585 /* Test writes, can check for a single bit pass or multiple bit pass */
2586 static uint32_t rw_mgr_mem_calibrate_write_test(uint32_t rank_bgn,
2587         uint32_t write_group, uint32_t use_dm, uint32_t all_correct,
2588         uint32_t *bit_chk, uint32_t all_ranks)
2589 {
2590         uint32_t r;
2591         uint32_t correct_mask_vg;
2592         uint32_t tmp_bit_chk;
2593         uint32_t vg;
2594         uint32_t rank_end = all_ranks ? RW_MGR_MEM_NUMBER_OF_RANKS :
2595                 (rank_bgn + NUM_RANKS_PER_SHADOW_REG);
2596         uint32_t addr_rw_mgr;
2597         uint32_t base_rw_mgr;
2598
2599         *bit_chk = param->write_correct_mask;
2600         correct_mask_vg = param->write_correct_mask_vg;
2601
2602         for (r = rank_bgn; r < rank_end; r++) {
2603                 if (param->skip_ranks[r]) {
2604                         /* request to skip the rank */
2605                         continue;
2606                 }
2607
2608                 /* set rank */
2609                 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_READ_WRITE);
2610
2611                 tmp_bit_chk = 0;
2612                 addr_rw_mgr = SDR_PHYGRP_RWMGRGRP_ADDRESS;
2613                 for (vg = RW_MGR_MEM_VIRTUAL_GROUPS_PER_WRITE_DQS-1; ; vg--) {
2614                         /* reset the fifos to get pointers to known state */
2615                         writel(0, &phy_mgr_cmd->fifo_reset);
2616
2617                         tmp_bit_chk = tmp_bit_chk <<
2618                                 (RW_MGR_MEM_DQ_PER_WRITE_DQS /
2619                                 RW_MGR_MEM_VIRTUAL_GROUPS_PER_WRITE_DQS);
2620                         rw_mgr_mem_calibrate_write_test_issue(write_group *
2621                                 RW_MGR_MEM_VIRTUAL_GROUPS_PER_WRITE_DQS+vg,
2622                                 use_dm);
2623
2624                         base_rw_mgr = readl(addr_rw_mgr);
2625                         tmp_bit_chk = tmp_bit_chk | (correct_mask_vg & ~(base_rw_mgr));
2626                         if (vg == 0)
2627                                 break;
2628                 }
2629                 *bit_chk &= tmp_bit_chk;
2630         }
2631
2632         if (all_correct) {
2633                 set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF);
2634                 debug_cond(DLEVEL == 2, "write_test(%u,%u,ALL) : %u == \
2635                            %u => %lu", write_group, use_dm,
2636                            *bit_chk, param->write_correct_mask,
2637                            (long unsigned int)(*bit_chk ==
2638                            param->write_correct_mask));
2639                 return *bit_chk == param->write_correct_mask;
2640         } else {
2641                 set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF);
2642                 debug_cond(DLEVEL == 2, "write_test(%u,%u,ONE) : %u != ",
2643                        write_group, use_dm, *bit_chk);
2644                 debug_cond(DLEVEL == 2, "%lu" " => %lu", (long unsigned int)0,
2645                         (long unsigned int)(*bit_chk != 0));
2646                 return *bit_chk != 0x00;
2647         }
2648 }
2649
2650 /*
2651  * center all windows. do per-bit-deskew to possibly increase size of
2652  * certain windows.
2653  */
2654 static uint32_t rw_mgr_mem_calibrate_writes_center(uint32_t rank_bgn,
2655         uint32_t write_group, uint32_t test_bgn)
2656 {
2657         uint32_t i, p, min_index;
2658         int32_t d;
2659         /*
2660          * Store these as signed since there are comparisons with
2661          * signed numbers.
2662          */
2663         uint32_t bit_chk;
2664         uint32_t sticky_bit_chk;
2665         int32_t left_edge[RW_MGR_MEM_DQ_PER_WRITE_DQS];
2666         int32_t right_edge[RW_MGR_MEM_DQ_PER_WRITE_DQS];
2667         int32_t mid;
2668         int32_t mid_min, orig_mid_min;
2669         int32_t new_dqs, start_dqs, shift_dq;
2670         int32_t dq_margin, dqs_margin, dm_margin;
2671         uint32_t stop;
2672         uint32_t temp_dq_out1_delay;
2673         uint32_t addr;
2674
2675         debug("%s:%d %u %u", __func__, __LINE__, write_group, test_bgn);
2676
2677         dm_margin = 0;
2678
2679         addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_IO_OUT1_DELAY_OFFSET;
2680         start_dqs = readl(addr +
2681                           (RW_MGR_MEM_DQ_PER_WRITE_DQS << 2));
2682
2683         /* per-bit deskew */
2684
2685         /*
2686          * set the left and right edge of each bit to an illegal value
2687          * use (IO_IO_OUT1_DELAY_MAX + 1) as an illegal value.
2688          */
2689         sticky_bit_chk = 0;
2690         for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) {
2691                 left_edge[i]  = IO_IO_OUT1_DELAY_MAX + 1;
2692                 right_edge[i] = IO_IO_OUT1_DELAY_MAX + 1;
2693         }
2694
2695         /* Search for the left edge of the window for each bit */
2696         for (d = 0; d <= IO_IO_OUT1_DELAY_MAX; d++) {
2697                 scc_mgr_apply_group_dq_out1_delay(write_group, test_bgn, d);
2698
2699                 writel(0, &sdr_scc_mgr->update);
2700
2701                 /*
2702                  * Stop searching when the read test doesn't pass AND when
2703                  * we've seen a passing read on every bit.
2704                  */
2705                 stop = !rw_mgr_mem_calibrate_write_test(rank_bgn, write_group,
2706                         0, PASS_ONE_BIT, &bit_chk, 0);
2707                 sticky_bit_chk = sticky_bit_chk | bit_chk;
2708                 stop = stop && (sticky_bit_chk == param->write_correct_mask);
2709                 debug_cond(DLEVEL == 2, "write_center(left): dtap=%d => %u \
2710                            == %u && %u [bit_chk= %u ]\n",
2711                         d, sticky_bit_chk, param->write_correct_mask,
2712                         stop, bit_chk);
2713
2714                 if (stop == 1) {
2715                         break;
2716                 } else {
2717                         for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) {
2718                                 if (bit_chk & 1) {
2719                                         /*
2720                                          * Remember a passing test as the
2721                                          * left_edge.
2722                                          */
2723                                         left_edge[i] = d;
2724                                 } else {
2725                                         /*
2726                                          * If a left edge has not been seen
2727                                          * yet, then a future passing test will
2728                                          * mark this edge as the right edge.
2729                                          */
2730                                         if (left_edge[i] ==
2731                                                 IO_IO_OUT1_DELAY_MAX + 1) {
2732                                                 right_edge[i] = -(d + 1);
2733                                         }
2734                                 }
2735                                 debug_cond(DLEVEL == 2, "write_center[l,d=%d):", d);
2736                                 debug_cond(DLEVEL == 2, "bit_chk_test=%d left_edge[%u]: %d",
2737                                            (int)(bit_chk & 1), i, left_edge[i]);
2738                                 debug_cond(DLEVEL == 2, "right_edge[%u]: %d\n", i,
2739                                        right_edge[i]);
2740                                 bit_chk = bit_chk >> 1;
2741                         }
2742                 }
2743         }
2744
2745         /* Reset DQ delay chains to 0 */
2746         scc_mgr_apply_group_dq_out1_delay(write_group, test_bgn, 0);
2747         sticky_bit_chk = 0;
2748         for (i = RW_MGR_MEM_DQ_PER_WRITE_DQS - 1;; i--) {
2749                 debug_cond(DLEVEL == 2, "%s:%d write_center: left_edge[%u]: \
2750                            %d right_edge[%u]: %d\n", __func__, __LINE__,
2751                            i, left_edge[i], i, right_edge[i]);
2752
2753                 /*
2754                  * Check for cases where we haven't found the left edge,
2755                  * which makes our assignment of the the right edge invalid.
2756                  * Reset it to the illegal value.
2757                  */
2758                 if ((left_edge[i] == IO_IO_OUT1_DELAY_MAX + 1) &&
2759                     (right_edge[i] != IO_IO_OUT1_DELAY_MAX + 1)) {
2760                         right_edge[i] = IO_IO_OUT1_DELAY_MAX + 1;
2761                         debug_cond(DLEVEL == 2, "%s:%d write_center: reset \
2762                                    right_edge[%u]: %d\n", __func__, __LINE__,
2763                                    i, right_edge[i]);
2764                 }
2765
2766                 /*
2767                  * Reset sticky bit (except for bits where we have
2768                  * seen the left edge).
2769                  */
2770                 sticky_bit_chk = sticky_bit_chk << 1;
2771                 if ((left_edge[i] != IO_IO_OUT1_DELAY_MAX + 1))
2772                         sticky_bit_chk = sticky_bit_chk | 1;
2773
2774                 if (i == 0)
2775                         break;
2776         }
2777
2778         /* Search for the right edge of the window for each bit */
2779         for (d = 0; d <= IO_IO_OUT1_DELAY_MAX - start_dqs; d++) {
2780                 scc_mgr_apply_group_dqs_io_and_oct_out1(write_group,
2781                                                         d + start_dqs);
2782
2783                 writel(0, &sdr_scc_mgr->update);
2784
2785                 /*
2786                  * Stop searching when the read test doesn't pass AND when
2787                  * we've seen a passing read on every bit.
2788                  */
2789                 stop = !rw_mgr_mem_calibrate_write_test(rank_bgn, write_group,
2790                         0, PASS_ONE_BIT, &bit_chk, 0);
2791
2792                 sticky_bit_chk = sticky_bit_chk | bit_chk;
2793                 stop = stop && (sticky_bit_chk == param->write_correct_mask);
2794
2795                 debug_cond(DLEVEL == 2, "write_center (right): dtap=%u => %u == \
2796                            %u && %u\n", d, sticky_bit_chk,
2797                            param->write_correct_mask, stop);
2798
2799                 if (stop == 1) {
2800                         if (d == 0) {
2801                                 for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS;
2802                                         i++) {
2803                                         /* d = 0 failed, but it passed when
2804                                         testing the left edge, so it must be
2805                                         marginal, set it to -1 */
2806                                         if (right_edge[i] ==
2807                                                 IO_IO_OUT1_DELAY_MAX + 1 &&
2808                                                 left_edge[i] !=
2809                                                 IO_IO_OUT1_DELAY_MAX + 1) {
2810                                                 right_edge[i] = -1;
2811                                         }
2812                                 }
2813                         }
2814                         break;
2815                 } else {
2816                         for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) {
2817                                 if (bit_chk & 1) {
2818                                         /*
2819                                          * Remember a passing test as
2820                                          * the right_edge.
2821                                          */
2822                                         right_edge[i] = d;
2823                                 } else {
2824                                         if (d != 0) {
2825                                                 /*
2826                                                  * If a right edge has not
2827                                                  * been seen yet, then a future
2828                                                  * passing test will mark this
2829                                                  * edge as the left edge.
2830                                                  */
2831                                                 if (right_edge[i] ==
2832                                                     IO_IO_OUT1_DELAY_MAX + 1)
2833                                                         left_edge[i] = -(d + 1);
2834                                         } else {
2835                                                 /*
2836                                                  * d = 0 failed, but it passed
2837                                                  * when testing the left edge,
2838                                                  * so it must be marginal, set
2839                                                  * it to -1.
2840                                                  */
2841                                                 if (right_edge[i] ==
2842                                                     IO_IO_OUT1_DELAY_MAX + 1 &&
2843                                                     left_edge[i] !=
2844                                                     IO_IO_OUT1_DELAY_MAX + 1)
2845                                                         right_edge[i] = -1;
2846                                                 /*
2847                                                  * If a right edge has not been
2848                                                  * seen yet, then a future
2849                                                  * passing test will mark this
2850                                                  * edge as the left edge.
2851                                                  */
2852                                                 else if (right_edge[i] ==
2853                                                         IO_IO_OUT1_DELAY_MAX +
2854                                                         1)
2855                                                         left_edge[i] = -(d + 1);
2856                                         }
2857                                 }
2858                                 debug_cond(DLEVEL == 2, "write_center[r,d=%d):", d);
2859                                 debug_cond(DLEVEL == 2, "bit_chk_test=%d left_edge[%u]: %d",
2860                                            (int)(bit_chk & 1), i, left_edge[i]);
2861                                 debug_cond(DLEVEL == 2, "right_edge[%u]: %d\n", i,
2862                                            right_edge[i]);
2863                                 bit_chk = bit_chk >> 1;
2864                         }
2865                 }
2866         }
2867
2868         /* Check that all bits have a window */
2869         for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) {
2870                 debug_cond(DLEVEL == 2, "%s:%d write_center: left_edge[%u]: \
2871                            %d right_edge[%u]: %d", __func__, __LINE__,
2872                            i, left_edge[i], i, right_edge[i]);
2873                 if ((left_edge[i] == IO_IO_OUT1_DELAY_MAX + 1) ||
2874                     (right_edge[i] == IO_IO_OUT1_DELAY_MAX + 1)) {
2875                         set_failing_group_stage(test_bgn + i,
2876                                                 CAL_STAGE_WRITES,
2877                                                 CAL_SUBSTAGE_WRITES_CENTER);
2878                         return 0;
2879                 }
2880         }
2881
2882         /* Find middle of window for each DQ bit */
2883         mid_min = left_edge[0] - right_edge[0];
2884         min_index = 0;
2885         for (i = 1; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) {
2886                 mid = left_edge[i] - right_edge[i];
2887                 if (mid < mid_min) {
2888                         mid_min = mid;
2889                         min_index = i;
2890                 }
2891         }
2892
2893         /*
2894          * -mid_min/2 represents the amount that we need to move DQS.
2895          * If mid_min is odd and positive we'll need to add one to
2896          * make sure the rounding in further calculations is correct
2897          * (always bias to the right), so just add 1 for all positive values.
2898          */
2899         if (mid_min > 0)
2900                 mid_min++;
2901         mid_min = mid_min / 2;
2902         debug_cond(DLEVEL == 1, "%s:%d write_center: mid_min=%d\n", __func__,
2903                    __LINE__, mid_min);
2904
2905         /* Determine the amount we can change DQS (which is -mid_min) */
2906         orig_mid_min = mid_min;
2907         new_dqs = start_dqs;
2908         mid_min = 0;
2909         debug_cond(DLEVEL == 1, "%s:%d write_center: start_dqs=%d new_dqs=%d \
2910                    mid_min=%d\n", __func__, __LINE__, start_dqs, new_dqs, mid_min);
2911         /* Initialize data for export structures */
2912         dqs_margin = IO_IO_OUT1_DELAY_MAX + 1;
2913         dq_margin  = IO_IO_OUT1_DELAY_MAX + 1;
2914
2915         /* add delay to bring centre of all DQ windows to the same "level" */
2916         for (i = 0, p = test_bgn; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++, p++) {
2917                 /* Use values before divide by 2 to reduce round off error */
2918                 shift_dq = (left_edge[i] - right_edge[i] -
2919                         (left_edge[min_index] - right_edge[min_index]))/2  +
2920                 (orig_mid_min - mid_min);
2921
2922                 debug_cond(DLEVEL == 2, "%s:%d write_center: before: shift_dq \
2923                            [%u]=%d\n", __func__, __LINE__, i, shift_dq);
2924
2925                 addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_IO_OUT1_DELAY_OFFSET;
2926                 temp_dq_out1_delay = readl(addr + (i << 2));
2927                 if (shift_dq + (int32_t)temp_dq_out1_delay >
2928                         (int32_t)IO_IO_OUT1_DELAY_MAX) {
2929                         shift_dq = (int32_t)IO_IO_OUT1_DELAY_MAX - temp_dq_out1_delay;
2930                 } else if (shift_dq + (int32_t)temp_dq_out1_delay < 0) {
2931                         shift_dq = -(int32_t)temp_dq_out1_delay;
2932                 }
2933                 debug_cond(DLEVEL == 2, "write_center: after: shift_dq[%u]=%d\n",
2934                            i, shift_dq);
2935                 scc_mgr_set_dq_out1_delay(i, temp_dq_out1_delay + shift_dq);
2936                 scc_mgr_load_dq(i);
2937
2938                 debug_cond(DLEVEL == 2, "write_center: margin[%u]=[%d,%d]\n", i,
2939                            left_edge[i] - shift_dq + (-mid_min),
2940                            right_edge[i] + shift_dq - (-mid_min));
2941                 /* To determine values for export structures */
2942                 if (left_edge[i] - shift_dq + (-mid_min) < dq_margin)
2943                         dq_margin = left_edge[i] - shift_dq + (-mid_min);
2944
2945                 if (right_edge[i] + shift_dq - (-mid_min) < dqs_margin)
2946                         dqs_margin = right_edge[i] + shift_dq - (-mid_min);
2947         }
2948
2949         /* Move DQS */
2950         scc_mgr_apply_group_dqs_io_and_oct_out1(write_group, new_dqs);
2951         writel(0, &sdr_scc_mgr->update);
2952
2953         /* Centre DM */
2954         debug_cond(DLEVEL == 2, "%s:%d write_center: DM\n", __func__, __LINE__);
2955
2956         /*
2957          * set the left and right edge of each bit to an illegal value,
2958          * use (IO_IO_OUT1_DELAY_MAX + 1) as an illegal value,
2959          */
2960         left_edge[0]  = IO_IO_OUT1_DELAY_MAX + 1;
2961         right_edge[0] = IO_IO_OUT1_DELAY_MAX + 1;
2962         int32_t bgn_curr = IO_IO_OUT1_DELAY_MAX + 1;
2963         int32_t end_curr = IO_IO_OUT1_DELAY_MAX + 1;
2964         int32_t bgn_best = IO_IO_OUT1_DELAY_MAX + 1;
2965         int32_t end_best = IO_IO_OUT1_DELAY_MAX + 1;
2966         int32_t win_best = 0;
2967
2968         /* Search for the/part of the window with DM shift */
2969         for (d = IO_IO_OUT1_DELAY_MAX; d >= 0; d -= DELTA_D) {
2970                 scc_mgr_apply_group_dm_out1_delay(write_group, d);
2971                 writel(0, &sdr_scc_mgr->update);
2972
2973                 if (rw_mgr_mem_calibrate_write_test(rank_bgn, write_group, 1,
2974                                                     PASS_ALL_BITS, &bit_chk,
2975                                                     0)) {
2976                         /* USE Set current end of the window */
2977                         end_curr = -d;
2978                         /*
2979                          * If a starting edge of our window has not been seen
2980                          * this is our current start of the DM window.
2981                          */
2982                         if (bgn_curr == IO_IO_OUT1_DELAY_MAX + 1)
2983                                 bgn_curr = -d;
2984
2985                         /*
2986                          * If current window is bigger than best seen.
2987                          * Set best seen to be current window.
2988                          */
2989                         if ((end_curr-bgn_curr+1) > win_best) {
2990                                 win_best = end_curr-bgn_curr+1;
2991                                 bgn_best = bgn_curr;
2992                                 end_best = end_curr;
2993                         }
2994                 } else {
2995                         /* We just saw a failing test. Reset temp edge */
2996                         bgn_curr = IO_IO_OUT1_DELAY_MAX + 1;
2997                         end_curr = IO_IO_OUT1_DELAY_MAX + 1;
2998                         }
2999                 }
3000
3001
3002         /* Reset DM delay chains to 0 */
3003         scc_mgr_apply_group_dm_out1_delay(write_group, 0);
3004
3005         /*
3006          * Check to see if the current window nudges up aganist 0 delay.
3007          * If so we need to continue the search by shifting DQS otherwise DQS
3008          * search begins as a new search. */
3009         if (end_curr != 0) {
3010                 bgn_curr = IO_IO_OUT1_DELAY_MAX + 1;
3011                 end_curr = IO_IO_OUT1_DELAY_MAX + 1;
3012         }
3013
3014         /* Search for the/part of the window with DQS shifts */
3015         for (d = 0; d <= IO_IO_OUT1_DELAY_MAX - new_dqs; d += DELTA_D) {
3016                 /*
3017                  * Note: This only shifts DQS, so are we limiting ourselve to
3018                  * width of DQ unnecessarily.
3019                  */
3020                 scc_mgr_apply_group_dqs_io_and_oct_out1(write_group,
3021                                                         d + new_dqs);
3022
3023                 writel(0, &sdr_scc_mgr->update);
3024                 if (rw_mgr_mem_calibrate_write_test(rank_bgn, write_group, 1,
3025                                                     PASS_ALL_BITS, &bit_chk,
3026                                                     0)) {
3027                         /* USE Set current end of the window */
3028                         end_curr = d;
3029                         /*
3030                          * If a beginning edge of our window has not been seen
3031                          * this is our current begin of the DM window.
3032                          */
3033                         if (bgn_curr == IO_IO_OUT1_DELAY_MAX + 1)
3034                                 bgn_curr = d;
3035
3036                         /*
3037                          * If current window is bigger than best seen. Set best
3038                          * seen to be current window.
3039                          */
3040                         if ((end_curr-bgn_curr+1) > win_best) {
3041                                 win_best = end_curr-bgn_curr+1;
3042                                 bgn_best = bgn_curr;
3043                                 end_best = end_curr;
3044                         }
3045                 } else {
3046                         /* We just saw a failing test. Reset temp edge */
3047                         bgn_curr = IO_IO_OUT1_DELAY_MAX + 1;
3048                         end_curr = IO_IO_OUT1_DELAY_MAX + 1;
3049
3050                         /* Early exit optimization: if ther remaining delay
3051                         chain space is less than already seen largest window
3052                         we can exit */
3053                         if ((win_best-1) >
3054                                 (IO_IO_OUT1_DELAY_MAX - new_dqs - d)) {
3055                                         break;
3056                                 }
3057                         }
3058                 }
3059
3060         /* assign left and right edge for cal and reporting; */
3061         left_edge[0] = -1*bgn_best;
3062         right_edge[0] = end_best;
3063
3064         debug_cond(DLEVEL == 2, "%s:%d dm_calib: left=%d right=%d\n", __func__,
3065                    __LINE__, left_edge[0], right_edge[0]);
3066
3067         /* Move DQS (back to orig) */
3068         scc_mgr_apply_group_dqs_io_and_oct_out1(write_group, new_dqs);
3069
3070         /* Move DM */
3071
3072         /* Find middle of window for the DM bit */
3073         mid = (left_edge[0] - right_edge[0]) / 2;
3074
3075         /* only move right, since we are not moving DQS/DQ */
3076         if (mid < 0)
3077                 mid = 0;
3078
3079         /* dm_marign should fail if we never find a window */
3080         if (win_best == 0)
3081                 dm_margin = -1;
3082         else
3083                 dm_margin = left_edge[0] - mid;
3084
3085         scc_mgr_apply_group_dm_out1_delay(write_group, mid);
3086         writel(0, &sdr_scc_mgr->update);
3087
3088         debug_cond(DLEVEL == 2, "%s:%d dm_calib: left=%d right=%d mid=%d \
3089                    dm_margin=%d\n", __func__, __LINE__, left_edge[0],
3090                    right_edge[0], mid, dm_margin);
3091         /* Export values */
3092         gbl->fom_out += dq_margin + dqs_margin;
3093
3094         debug_cond(DLEVEL == 2, "%s:%d write_center: dq_margin=%d \
3095                    dqs_margin=%d dm_margin=%d\n", __func__, __LINE__,
3096                    dq_margin, dqs_margin, dm_margin);
3097
3098         /*
3099          * Do not remove this line as it makes sure all of our
3100          * decisions have been applied.
3101          */
3102         writel(0, &sdr_scc_mgr->update);
3103         return (dq_margin >= 0) && (dqs_margin >= 0) && (dm_margin >= 0);
3104 }
3105
3106 /* calibrate the write operations */
3107 static uint32_t rw_mgr_mem_calibrate_writes(uint32_t rank_bgn, uint32_t g,
3108         uint32_t test_bgn)
3109 {
3110         /* update info for sims */
3111         debug("%s:%d %u %u\n", __func__, __LINE__, g, test_bgn);
3112
3113         reg_file_set_stage(CAL_STAGE_WRITES);
3114         reg_file_set_sub_stage(CAL_SUBSTAGE_WRITES_CENTER);
3115
3116         reg_file_set_group(g);
3117
3118         if (!rw_mgr_mem_calibrate_writes_center(rank_bgn, g, test_bgn)) {
3119                 set_failing_group_stage(g, CAL_STAGE_WRITES,
3120                                         CAL_SUBSTAGE_WRITES_CENTER);
3121                 return 0;
3122         }
3123
3124         return 1;
3125 }
3126
3127 /* precharge all banks and activate row 0 in bank "000..." and bank "111..." */
3128 static void mem_precharge_and_activate(void)
3129 {
3130         uint32_t r;
3131
3132         for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; r++) {
3133                 if (param->skip_ranks[r]) {
3134                         /* request to skip the rank */
3135                         continue;
3136                 }
3137
3138                 /* set rank */
3139                 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_OFF);
3140
3141                 /* precharge all banks ... */
3142                 writel(RW_MGR_PRECHARGE_ALL, SDR_PHYGRP_RWMGRGRP_ADDRESS |
3143                                              RW_MGR_RUN_SINGLE_GROUP_OFFSET);
3144
3145                 writel(0x0F, &sdr_rw_load_mgr_regs->load_cntr0);
3146                 writel(RW_MGR_ACTIVATE_0_AND_1_WAIT1,
3147                         &sdr_rw_load_jump_mgr_regs->load_jump_add0);
3148
3149                 writel(0x0F, &sdr_rw_load_mgr_regs->load_cntr1);
3150                 writel(RW_MGR_ACTIVATE_0_AND_1_WAIT2,
3151                         &sdr_rw_load_jump_mgr_regs->load_jump_add1);
3152
3153                 /* activate rows */
3154                 writel(RW_MGR_ACTIVATE_0_AND_1, SDR_PHYGRP_RWMGRGRP_ADDRESS |
3155                                                 RW_MGR_RUN_SINGLE_GROUP_OFFSET);
3156         }
3157 }
3158
3159 /* Configure various memory related parameters. */
3160 static void mem_config(void)
3161 {
3162         uint32_t rlat, wlat;
3163         uint32_t rw_wl_nop_cycles;
3164         uint32_t max_latency;
3165
3166         debug("%s:%d\n", __func__, __LINE__);
3167         /* read in write and read latency */
3168         wlat = readl(&data_mgr->t_wl_add);
3169         wlat += readl(&data_mgr->mem_t_add);
3170
3171         /* WL for hard phy does not include additive latency */
3172
3173         /*
3174          * add addtional write latency to offset the address/command extra
3175          * clock cycle. We change the AC mux setting causing AC to be delayed
3176          * by one mem clock cycle. Only do this for DDR3
3177          */
3178         wlat = wlat + 1;
3179
3180         rlat = readl(&data_mgr->t_rl_add);
3181
3182         rw_wl_nop_cycles = wlat - 2;
3183         gbl->rw_wl_nop_cycles = rw_wl_nop_cycles;
3184
3185         /*
3186          * For AV/CV, lfifo is hardened and always runs at full rate so
3187          * max latency in AFI clocks, used here, is correspondingly smaller.
3188          */
3189         max_latency = (1<<MAX_LATENCY_COUNT_WIDTH)/1 - 1;
3190         /* configure for a burst length of 8 */
3191
3192         /* write latency */
3193         /* Adjust Write Latency for Hard PHY */
3194         wlat = wlat + 1;
3195
3196         /* set a pretty high read latency initially */
3197         gbl->curr_read_lat = rlat + 16;
3198
3199         if (gbl->curr_read_lat > max_latency)
3200                 gbl->curr_read_lat = max_latency;
3201
3202         writel(gbl->curr_read_lat, &phy_mgr_cfg->phy_rlat);
3203
3204         /* advertise write latency */
3205         gbl->curr_write_lat = wlat;
3206         writel(wlat - 2, &phy_mgr_cfg->afi_wlat);
3207
3208         /* initialize bit slips */
3209         mem_precharge_and_activate();
3210 }
3211
3212 /* Set VFIFO and LFIFO to instant-on settings in skip calibration mode */
3213 static void mem_skip_calibrate(void)
3214 {
3215         uint32_t vfifo_offset;
3216         uint32_t i, j, r;
3217
3218         debug("%s:%d\n", __func__, __LINE__);
3219         /* Need to update every shadow register set used by the interface */
3220         for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS;
3221                 r += NUM_RANKS_PER_SHADOW_REG) {
3222                 /*
3223                  * Set output phase alignment settings appropriate for
3224                  * skip calibration.
3225                  */
3226                 for (i = 0; i < RW_MGR_MEM_IF_READ_DQS_WIDTH; i++) {
3227                         scc_mgr_set_dqs_en_phase(i, 0);
3228 #if IO_DLL_CHAIN_LENGTH == 6
3229                         scc_mgr_set_dqdqs_output_phase(i, 6);
3230 #else
3231                         scc_mgr_set_dqdqs_output_phase(i, 7);
3232 #endif
3233                         /*
3234                          * Case:33398
3235                          *
3236                          * Write data arrives to the I/O two cycles before write
3237                          * latency is reached (720 deg).
3238                          *   -> due to bit-slip in a/c bus
3239                          *   -> to allow board skew where dqs is longer than ck
3240                          *      -> how often can this happen!?
3241                          *      -> can claim back some ptaps for high freq
3242                          *       support if we can relax this, but i digress...
3243                          *
3244                          * The write_clk leads mem_ck by 90 deg
3245                          * The minimum ptap of the OPA is 180 deg
3246                          * Each ptap has (360 / IO_DLL_CHAIN_LENGH) deg of delay
3247                          * The write_clk is always delayed by 2 ptaps
3248                          *
3249                          * Hence, to make DQS aligned to CK, we need to delay
3250                          * DQS by:
3251                          *    (720 - 90 - 180 - 2 * (360 / IO_DLL_CHAIN_LENGTH))
3252                          *
3253                          * Dividing the above by (360 / IO_DLL_CHAIN_LENGTH)
3254                          * gives us the number of ptaps, which simplies to:
3255                          *
3256                          *    (1.25 * IO_DLL_CHAIN_LENGTH - 2)
3257                          */
3258                         scc_mgr_set_dqdqs_output_phase(i, (1.25 *
3259                                 IO_DLL_CHAIN_LENGTH - 2));
3260                 }
3261                 writel(0xff, &sdr_scc_mgr->dqs_ena);
3262                 writel(0xff, &sdr_scc_mgr->dqs_io_ena);
3263
3264                 for (i = 0; i < RW_MGR_MEM_IF_WRITE_DQS_WIDTH; i++) {
3265                         writel(i, SDR_PHYGRP_SCCGRP_ADDRESS |
3266                                   SCC_MGR_GROUP_COUNTER_OFFSET);
3267                 }
3268                 writel(0xff, &sdr_scc_mgr->dq_ena);
3269                 writel(0xff, &sdr_scc_mgr->dm_ena);
3270                 writel(0, &sdr_scc_mgr->update);
3271         }
3272
3273         /* Compensate for simulation model behaviour */
3274         for (i = 0; i < RW_MGR_MEM_IF_READ_DQS_WIDTH; i++) {
3275                 scc_mgr_set_dqs_bus_in_delay(i, 10);
3276                 scc_mgr_load_dqs(i);
3277         }
3278         writel(0, &sdr_scc_mgr->update);
3279
3280         /*
3281          * ArriaV has hard FIFOs that can only be initialized by incrementing
3282          * in sequencer.
3283          */
3284         vfifo_offset = CALIB_VFIFO_OFFSET;
3285         for (j = 0; j < vfifo_offset; j++) {
3286                 writel(0xff, &phy_mgr_cmd->inc_vfifo_hard_phy);
3287         }
3288         writel(0, &phy_mgr_cmd->fifo_reset);
3289
3290         /*
3291          * For ACV with hard lfifo, we get the skip-cal setting from
3292          * generation-time constant.
3293          */
3294         gbl->curr_read_lat = CALIB_LFIFO_OFFSET;
3295         writel(gbl->curr_read_lat, &phy_mgr_cfg->phy_rlat);
3296 }
3297
3298 /* Memory calibration entry point */
3299 static uint32_t mem_calibrate(void)
3300 {
3301         uint32_t i;
3302         uint32_t rank_bgn, sr;
3303         uint32_t write_group, write_test_bgn;
3304         uint32_t read_group, read_test_bgn;
3305         uint32_t run_groups, current_run;
3306         uint32_t failing_groups = 0;
3307         uint32_t group_failed = 0;
3308         uint32_t sr_failed = 0;
3309
3310         debug("%s:%d\n", __func__, __LINE__);
3311         /* Initialize the data settings */
3312
3313         gbl->error_substage = CAL_SUBSTAGE_NIL;
3314         gbl->error_stage = CAL_STAGE_NIL;
3315         gbl->error_group = 0xff;
3316         gbl->fom_in = 0;
3317         gbl->fom_out = 0;
3318
3319         mem_config();
3320
3321         for (i = 0; i < RW_MGR_MEM_IF_READ_DQS_WIDTH; i++) {
3322                 writel(i, SDR_PHYGRP_SCCGRP_ADDRESS |
3323                           SCC_MGR_GROUP_COUNTER_OFFSET);
3324                 scc_set_bypass_mode(i);
3325         }
3326
3327         if ((dyn_calib_steps & CALIB_SKIP_ALL) == CALIB_SKIP_ALL) {
3328                 /*
3329                  * Set VFIFO and LFIFO to instant-on settings in skip
3330                  * calibration mode.
3331                  */
3332                 mem_skip_calibrate();
3333         } else {
3334                 for (i = 0; i < NUM_CALIB_REPEAT; i++) {
3335                         /*
3336                          * Zero all delay chain/phase settings for all
3337                          * groups and all shadow register sets.
3338                          */
3339                         scc_mgr_zero_all();
3340
3341                         run_groups = ~param->skip_groups;
3342
3343                         for (write_group = 0, write_test_bgn = 0; write_group
3344                                 < RW_MGR_MEM_IF_WRITE_DQS_WIDTH; write_group++,
3345                                 write_test_bgn += RW_MGR_MEM_DQ_PER_WRITE_DQS) {
3346                                 /* Initialized the group failure */
3347                                 group_failed = 0;
3348
3349                                 current_run = run_groups & ((1 <<
3350                                         RW_MGR_NUM_DQS_PER_WRITE_GROUP) - 1);
3351                                 run_groups = run_groups >>
3352                                         RW_MGR_NUM_DQS_PER_WRITE_GROUP;
3353
3354                                 if (current_run == 0)
3355                                         continue;
3356
3357                                 writel(write_group, SDR_PHYGRP_SCCGRP_ADDRESS |
3358                                                     SCC_MGR_GROUP_COUNTER_OFFSET);
3359                                 scc_mgr_zero_group(write_group, write_test_bgn,
3360                                                    0);
3361
3362                                 for (read_group = write_group *
3363                                         RW_MGR_MEM_IF_READ_DQS_WIDTH /
3364                                         RW_MGR_MEM_IF_WRITE_DQS_WIDTH,
3365                                         read_test_bgn = 0;
3366                                         read_group < (write_group + 1) *
3367                                         RW_MGR_MEM_IF_READ_DQS_WIDTH /
3368                                         RW_MGR_MEM_IF_WRITE_DQS_WIDTH &&
3369                                         group_failed == 0;
3370                                         read_group++, read_test_bgn +=
3371                                         RW_MGR_MEM_DQ_PER_READ_DQS) {
3372                                         /* Calibrate the VFIFO */
3373                                         if (!((STATIC_CALIB_STEPS) &
3374                                                 CALIB_SKIP_VFIFO)) {
3375                                                 if (!rw_mgr_mem_calibrate_vfifo
3376                                                         (read_group,
3377                                                         read_test_bgn)) {
3378                                                         group_failed = 1;
3379
3380                                                         if (!(gbl->
3381                                                         phy_debug_mode_flags &
3382                                                 PHY_DEBUG_SWEEP_ALL_GROUPS)) {
3383                                                                 return 0;
3384                                                         }
3385                                                 }
3386                                         }
3387                                 }
3388
3389                                 /* Calibrate the output side */
3390                                 if (group_failed == 0)  {
3391                                         for (rank_bgn = 0, sr = 0; rank_bgn
3392                                                 < RW_MGR_MEM_NUMBER_OF_RANKS;
3393                                                 rank_bgn +=
3394                                                 NUM_RANKS_PER_SHADOW_REG,
3395                                                 ++sr) {
3396                                                 sr_failed = 0;
3397                                                 if (!((STATIC_CALIB_STEPS) &
3398                                                 CALIB_SKIP_WRITES)) {
3399                                                         if ((STATIC_CALIB_STEPS)
3400                                                 & CALIB_SKIP_DELAY_SWEEPS) {
3401                                                 /* not needed in quick mode! */
3402                                                         } else {
3403                                                 /*
3404                                                  * Determine if this set of
3405                                                  * ranks should be skipped
3406                                                  * entirely.
3407                                                  */
3408                                         if (!param->skip_shadow_regs[sr]) {
3409                                                 if (!rw_mgr_mem_calibrate_writes
3410                                                 (rank_bgn, write_group,
3411                                                 write_test_bgn)) {
3412                                                         sr_failed = 1;
3413                                                         if (!(gbl->
3414                                                         phy_debug_mode_flags &
3415                                                 PHY_DEBUG_SWEEP_ALL_GROUPS)) {
3416                                                                 return 0;
3417                                                                         }
3418                                                                         }
3419                                                                 }
3420                                                         }
3421                                                 }
3422                                                 if (sr_failed != 0)
3423                                                         group_failed = 1;
3424                                         }
3425                                 }
3426
3427                                 if (group_failed == 0) {
3428                                         for (read_group = write_group *
3429                                         RW_MGR_MEM_IF_READ_DQS_WIDTH /
3430                                         RW_MGR_MEM_IF_WRITE_DQS_WIDTH,
3431                                         read_test_bgn = 0;
3432                                                 read_group < (write_group + 1)
3433                                                 * RW_MGR_MEM_IF_READ_DQS_WIDTH
3434                                                 / RW_MGR_MEM_IF_WRITE_DQS_WIDTH &&
3435                                                 group_failed == 0;
3436                                                 read_group++, read_test_bgn +=
3437                                                 RW_MGR_MEM_DQ_PER_READ_DQS) {
3438                                                 if (!((STATIC_CALIB_STEPS) &
3439                                                         CALIB_SKIP_WRITES)) {
3440                                         if (!rw_mgr_mem_calibrate_vfifo_end
3441                                                 (read_group, read_test_bgn)) {
3442                                                         group_failed = 1;
3443
3444                                                 if (!(gbl->phy_debug_mode_flags
3445                                                 & PHY_DEBUG_SWEEP_ALL_GROUPS)) {
3446                                                                 return 0;
3447                                                                 }
3448                                                         }
3449                                                 }
3450                                         }
3451                                 }
3452
3453                                 if (group_failed != 0)
3454                                         failing_groups++;
3455                         }
3456
3457                         /*
3458                          * USER If there are any failing groups then report
3459                          * the failure.
3460                          */
3461                         if (failing_groups != 0)
3462                                 return 0;
3463
3464                         /* Calibrate the LFIFO */
3465                         if (!((STATIC_CALIB_STEPS) & CALIB_SKIP_LFIFO)) {
3466                                 /*
3467                                  * If we're skipping groups as part of debug,
3468                                  * don't calibrate LFIFO.
3469                                  */
3470                                 if (param->skip_groups == 0) {
3471                                         if (!rw_mgr_mem_calibrate_lfifo())
3472                                                 return 0;
3473                                 }
3474                         }
3475                 }
3476         }
3477
3478         /*
3479          * Do not remove this line as it makes sure all of our decisions
3480          * have been applied.
3481          */
3482         writel(0, &sdr_scc_mgr->update);
3483         return 1;
3484 }
3485
3486 static uint32_t run_mem_calibrate(void)
3487 {
3488         uint32_t pass;
3489         uint32_t debug_info;
3490
3491         debug("%s:%d\n", __func__, __LINE__);
3492
3493         /* Reset pass/fail status shown on afi_cal_success/fail */
3494         writel(PHY_MGR_CAL_RESET, &phy_mgr_cfg->cal_status);
3495
3496         /* stop tracking manger */
3497         uint32_t ctrlcfg = readl(&sdr_ctrl->ctrl_cfg);
3498
3499         writel(ctrlcfg & 0xFFBFFFFF, &sdr_ctrl->ctrl_cfg);
3500
3501         initialize();
3502         rw_mgr_mem_initialize();
3503
3504         pass = mem_calibrate();
3505
3506         mem_precharge_and_activate();
3507         writel(0, &phy_mgr_cmd->fifo_reset);
3508
3509         /*
3510          * Handoff:
3511          * Don't return control of the PHY back to AFI when in debug mode.
3512          */
3513         if ((gbl->phy_debug_mode_flags & PHY_DEBUG_IN_DEBUG_MODE) == 0) {
3514                 rw_mgr_mem_handoff();
3515                 /*
3516                  * In Hard PHY this is a 2-bit control:
3517                  * 0: AFI Mux Select
3518                  * 1: DDIO Mux Select
3519                  */
3520                 writel(0x2, &phy_mgr_cfg->mux_sel);
3521         }
3522
3523         writel(ctrlcfg, &sdr_ctrl->ctrl_cfg);
3524
3525         if (pass) {
3526                 printf("%s: CALIBRATION PASSED\n", __FILE__);
3527
3528                 gbl->fom_in /= 2;
3529                 gbl->fom_out /= 2;
3530
3531                 if (gbl->fom_in > 0xff)
3532                         gbl->fom_in = 0xff;
3533
3534                 if (gbl->fom_out > 0xff)
3535                         gbl->fom_out = 0xff;
3536
3537                 /* Update the FOM in the register file */
3538                 debug_info = gbl->fom_in;
3539                 debug_info |= gbl->fom_out << 8;
3540                 writel(debug_info, &sdr_reg_file->fom);
3541
3542                 writel(debug_info, &phy_mgr_cfg->cal_debug_info);
3543                 writel(PHY_MGR_CAL_SUCCESS, &phy_mgr_cfg->cal_status);
3544         } else {
3545                 printf("%s: CALIBRATION FAILED\n", __FILE__);
3546
3547                 debug_info = gbl->error_stage;
3548                 debug_info |= gbl->error_substage << 8;
3549                 debug_info |= gbl->error_group << 16;
3550
3551                 writel(debug_info, &sdr_reg_file->failing_stage);
3552                 writel(debug_info, &phy_mgr_cfg->cal_debug_info);
3553                 writel(PHY_MGR_CAL_FAIL, &phy_mgr_cfg->cal_status);
3554
3555                 /* Update the failing group/stage in the register file */
3556                 debug_info = gbl->error_stage;
3557                 debug_info |= gbl->error_substage << 8;
3558                 debug_info |= gbl->error_group << 16;
3559                 writel(debug_info, &sdr_reg_file->failing_stage);
3560         }
3561
3562         return pass;
3563 }
3564
3565 /**
3566  * hc_initialize_rom_data() - Initialize ROM data
3567  *
3568  * Initialize ROM data.
3569  */
3570 static void hc_initialize_rom_data(void)
3571 {
3572         u32 i, addr;
3573
3574         addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_INST_ROM_WRITE_OFFSET;
3575         for (i = 0; i < ARRAY_SIZE(inst_rom_init); i++)
3576                 writel(inst_rom_init[i], addr + (i << 2));
3577
3578         addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_AC_ROM_WRITE_OFFSET;
3579         for (i = 0; i < ARRAY_SIZE(ac_rom_init); i++)
3580                 writel(ac_rom_init[i], addr + (i << 2));
3581 }
3582
3583 /**
3584  * initialize_reg_file() - Initialize SDR register file
3585  *
3586  * Initialize SDR register file.
3587  */
3588 static void initialize_reg_file(void)
3589 {
3590         /* Initialize the register file with the correct data */
3591         writel(REG_FILE_INIT_SEQ_SIGNATURE, &sdr_reg_file->signature);
3592         writel(0, &sdr_reg_file->debug_data_addr);
3593         writel(0, &sdr_reg_file->cur_stage);
3594         writel(0, &sdr_reg_file->fom);
3595         writel(0, &sdr_reg_file->failing_stage);
3596         writel(0, &sdr_reg_file->debug1);
3597         writel(0, &sdr_reg_file->debug2);
3598 }
3599
3600 /**
3601  * initialize_hps_phy() - Initialize HPS PHY
3602  *
3603  * Initialize HPS PHY.
3604  */
3605 static void initialize_hps_phy(void)
3606 {
3607         uint32_t reg;
3608         /*
3609          * Tracking also gets configured here because it's in the
3610          * same register.
3611          */
3612         uint32_t trk_sample_count = 7500;
3613         uint32_t trk_long_idle_sample_count = (10 << 16) | 100;
3614         /*
3615          * Format is number of outer loops in the 16 MSB, sample
3616          * count in 16 LSB.
3617          */
3618
3619         reg = 0;
3620         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_ACDELAYEN_SET(2);
3621         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_DQDELAYEN_SET(1);
3622         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_DQSDELAYEN_SET(1);
3623         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_DQSLOGICDELAYEN_SET(1);
3624         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_RESETDELAYEN_SET(0);
3625         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_LPDDRDIS_SET(1);
3626         /*
3627          * This field selects the intrinsic latency to RDATA_EN/FULL path.
3628          * 00-bypass, 01- add 5 cycles, 10- add 10 cycles, 11- add 15 cycles.
3629          */
3630         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_ADDLATSEL_SET(0);
3631         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_SAMPLECOUNT_19_0_SET(
3632                 trk_sample_count);
3633         writel(reg, &sdr_ctrl->phy_ctrl0);
3634
3635         reg = 0;
3636         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_1_SAMPLECOUNT_31_20_SET(
3637                 trk_sample_count >>
3638                 SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_SAMPLECOUNT_19_0_WIDTH);
3639         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_1_LONGIDLESAMPLECOUNT_19_0_SET(
3640                 trk_long_idle_sample_count);
3641         writel(reg, &sdr_ctrl->phy_ctrl1);
3642
3643         reg = 0;
3644         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_2_LONGIDLESAMPLECOUNT_31_20_SET(
3645                 trk_long_idle_sample_count >>
3646                 SDR_CTRLGRP_PHYCTRL_PHYCTRL_1_LONGIDLESAMPLECOUNT_19_0_WIDTH);
3647         writel(reg, &sdr_ctrl->phy_ctrl2);
3648 }
3649
3650 static void initialize_tracking(void)
3651 {
3652         uint32_t concatenated_longidle = 0x0;
3653         uint32_t concatenated_delays = 0x0;
3654         uint32_t concatenated_rw_addr = 0x0;
3655         uint32_t concatenated_refresh = 0x0;
3656         uint32_t trk_sample_count = 7500;
3657         uint32_t dtaps_per_ptap;
3658         uint32_t tmp_delay;
3659
3660         /*
3661          * compute usable version of value in case we skip full
3662          * computation later
3663          */
3664         dtaps_per_ptap = 0;
3665         tmp_delay = 0;
3666         while (tmp_delay < IO_DELAY_PER_OPA_TAP) {
3667                 dtaps_per_ptap++;
3668                 tmp_delay += IO_DELAY_PER_DCHAIN_TAP;
3669         }
3670         dtaps_per_ptap--;
3671
3672         concatenated_longidle = concatenated_longidle ^ 10;
3673                 /*longidle outer loop */
3674         concatenated_longidle = concatenated_longidle << 16;
3675         concatenated_longidle = concatenated_longidle ^ 100;
3676                 /*longidle sample count */
3677         concatenated_delays = concatenated_delays ^ 243;
3678                 /* trfc, worst case of 933Mhz 4Gb */
3679         concatenated_delays = concatenated_delays << 8;
3680         concatenated_delays = concatenated_delays ^ 14;
3681                 /* trcd, worst case */
3682         concatenated_delays = concatenated_delays << 8;
3683         concatenated_delays = concatenated_delays ^ 10;
3684                 /* vfifo wait */
3685         concatenated_delays = concatenated_delays << 8;
3686         concatenated_delays = concatenated_delays ^ 4;
3687                 /* mux delay */
3688
3689         concatenated_rw_addr = concatenated_rw_addr ^ RW_MGR_IDLE;
3690         concatenated_rw_addr = concatenated_rw_addr << 8;
3691         concatenated_rw_addr = concatenated_rw_addr ^ RW_MGR_ACTIVATE_1;
3692         concatenated_rw_addr = concatenated_rw_addr << 8;
3693         concatenated_rw_addr = concatenated_rw_addr ^ RW_MGR_SGLE_READ;
3694         concatenated_rw_addr = concatenated_rw_addr << 8;
3695         concatenated_rw_addr = concatenated_rw_addr ^ RW_MGR_PRECHARGE_ALL;
3696
3697         concatenated_refresh = concatenated_refresh ^ RW_MGR_REFRESH_ALL;
3698         concatenated_refresh = concatenated_refresh << 24;
3699         concatenated_refresh = concatenated_refresh ^ 1000; /* trefi */
3700
3701         /* Initialize the register file with the correct data */
3702         writel(dtaps_per_ptap, &sdr_reg_file->dtaps_per_ptap);
3703         writel(trk_sample_count, &sdr_reg_file->trk_sample_count);
3704         writel(concatenated_longidle, &sdr_reg_file->trk_longidle);
3705         writel(concatenated_delays, &sdr_reg_file->delays);
3706         writel(concatenated_rw_addr, &sdr_reg_file->trk_rw_mgr_addr);
3707         writel(RW_MGR_MEM_IF_READ_DQS_WIDTH, &sdr_reg_file->trk_read_dqs_width);
3708         writel(concatenated_refresh, &sdr_reg_file->trk_rfsh);
3709 }
3710
3711 int sdram_calibration_full(void)
3712 {
3713         struct param_type my_param;
3714         struct gbl_type my_gbl;
3715         uint32_t pass;
3716         uint32_t i;
3717
3718         param = &my_param;
3719         gbl = &my_gbl;
3720
3721         /* Initialize the debug mode flags */
3722         gbl->phy_debug_mode_flags = 0;
3723         /* Set the calibration enabled by default */
3724         gbl->phy_debug_mode_flags |= PHY_DEBUG_ENABLE_CAL_RPT;
3725         /*
3726          * Only sweep all groups (regardless of fail state) by default
3727          * Set enabled read test by default.
3728          */
3729 #if DISABLE_GUARANTEED_READ
3730         gbl->phy_debug_mode_flags |= PHY_DEBUG_DISABLE_GUARANTEED_READ;
3731 #endif
3732         /* Initialize the register file */
3733         initialize_reg_file();
3734
3735         /* Initialize any PHY CSR */
3736         initialize_hps_phy();
3737
3738         scc_mgr_initialize();
3739
3740         initialize_tracking();
3741
3742         /* USER Enable all ranks, groups */
3743         for (i = 0; i < RW_MGR_MEM_NUMBER_OF_RANKS; i++)
3744                 param->skip_ranks[i] = 0;
3745         for (i = 0; i < NUM_SHADOW_REGS; ++i)
3746                 param->skip_shadow_regs[i] = 0;
3747         param->skip_groups = 0;
3748
3749         printf("%s: Preparing to start memory calibration\n", __FILE__);
3750
3751         debug("%s:%d\n", __func__, __LINE__);
3752         debug_cond(DLEVEL == 1,
3753                    "DDR3 FULL_RATE ranks=%u cs/dimm=%u dq/dqs=%u,%u vg/dqs=%u,%u ",
3754                    RW_MGR_MEM_NUMBER_OF_RANKS, RW_MGR_MEM_NUMBER_OF_CS_PER_DIMM,
3755                    RW_MGR_MEM_DQ_PER_READ_DQS, RW_MGR_MEM_DQ_PER_WRITE_DQS,
3756                    RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS,
3757                    RW_MGR_MEM_VIRTUAL_GROUPS_PER_WRITE_DQS);
3758         debug_cond(DLEVEL == 1,
3759                    "dqs=%u,%u dq=%u dm=%u ptap_delay=%u dtap_delay=%u ",
3760                    RW_MGR_MEM_IF_READ_DQS_WIDTH, RW_MGR_MEM_IF_WRITE_DQS_WIDTH,
3761                    RW_MGR_MEM_DATA_WIDTH, RW_MGR_MEM_DATA_MASK_WIDTH,
3762                    IO_DELAY_PER_OPA_TAP, IO_DELAY_PER_DCHAIN_TAP);
3763         debug_cond(DLEVEL == 1, "dtap_dqsen_delay=%u, dll=%u",
3764                    IO_DELAY_PER_DQS_EN_DCHAIN_TAP, IO_DLL_CHAIN_LENGTH);
3765         debug_cond(DLEVEL == 1, "max values: en_p=%u dqdqs_p=%u en_d=%u dqs_in_d=%u ",
3766                    IO_DQS_EN_PHASE_MAX, IO_DQDQS_OUT_PHASE_MAX,
3767                    IO_DQS_EN_DELAY_MAX, IO_DQS_IN_DELAY_MAX);
3768         debug_cond(DLEVEL == 1, "io_in_d=%u io_out1_d=%u io_out2_d=%u ",
3769                    IO_IO_IN_DELAY_MAX, IO_IO_OUT1_DELAY_MAX,
3770                    IO_IO_OUT2_DELAY_MAX);
3771         debug_cond(DLEVEL == 1, "dqs_in_reserve=%u dqs_out_reserve=%u\n",
3772                    IO_DQS_IN_RESERVE, IO_DQS_OUT_RESERVE);
3773
3774         hc_initialize_rom_data();
3775
3776         /* update info for sims */
3777         reg_file_set_stage(CAL_STAGE_NIL);
3778         reg_file_set_group(0);
3779
3780         /*
3781          * Load global needed for those actions that require
3782          * some dynamic calibration support.
3783          */
3784         dyn_calib_steps = STATIC_CALIB_STEPS;
3785         /*
3786          * Load global to allow dynamic selection of delay loop settings
3787          * based on calibration mode.
3788          */
3789         if (!(dyn_calib_steps & CALIB_SKIP_DELAY_LOOPS))
3790                 skip_delay_mask = 0xff;
3791         else
3792                 skip_delay_mask = 0x0;
3793
3794         pass = run_mem_calibrate();
3795
3796         printf("%s: Calibration complete\n", __FILE__);
3797         return pass;
3798 }