From cd8aefc076ebac3e01499a55fd0830144e854985 Mon Sep 17 00:00:00 2001 From: Prabhakar Kushwaha Date: Thu, 2 Jul 2015 11:29:00 +0530 Subject: [PATCH] drivers: fsl-mc: Update qbman driver Update qbman driver - As per latest available qbman driver - Use of atomic APIs Signed-off-by: Prabhakar Kushwaha CC: Geoff Thorpe CC: Haiying Wang CC: Roy Pledge Reviewed-by: York Sun --- drivers/net/fsl-mc/dpio/qbman_portal.c | 66 ++++++++++++++----------- drivers/net/fsl-mc/dpio/qbman_portal.h | 22 ++++++--- drivers/net/fsl-mc/dpio/qbman_private.h | 2 +- 3 files changed, 53 insertions(+), 37 deletions(-) diff --git a/drivers/net/fsl-mc/dpio/qbman_portal.c b/drivers/net/fsl-mc/dpio/qbman_portal.c index dd2a7deee5..5fa8d953e5 100644 --- a/drivers/net/fsl-mc/dpio/qbman_portal.c +++ b/drivers/net/fsl-mc/dpio/qbman_portal.c @@ -64,7 +64,7 @@ enum qbman_sdqcr_fc { struct qbman_swp *qbman_swp_init(const struct qbman_swp_desc *d) { int ret; - struct qbman_swp *p = kmalloc(sizeof(*p), GFP_KERNEL); + struct qbman_swp *p = malloc(sizeof(struct qbman_swp)); if (!p) return NULL; @@ -77,7 +77,7 @@ struct qbman_swp *qbman_swp_init(const struct qbman_swp_desc *d) qb_attr_code_encode(&code_sdqcr_dct, &p->sdq, qbman_sdqcr_dct_prio_ics); qb_attr_code_encode(&code_sdqcr_fc, &p->sdq, qbman_sdqcr_fc_up_to_3); qb_attr_code_encode(&code_sdqcr_tok, &p->sdq, 0xbb); - p->vdq.busy = 0; /* TODO: convert to atomic_t */ + atomic_set(&p->vdq.busy, 1); p->vdq.valid_bit = QB_VALID_BIT; p->dqrr.next_idx = 0; p->dqrr.valid_bit = QB_VALID_BIT; @@ -165,7 +165,6 @@ static struct qb_attr_code code_eq_qd_bin = QB_CODE(4, 0, 16); static struct qb_attr_code code_eq_qd_pri = QB_CODE(4, 16, 4); static struct qb_attr_code code_eq_rsp_stash = QB_CODE(5, 16, 1); static struct qb_attr_code code_eq_rsp_lo = QB_CODE(6, 0, 32); -static struct qb_attr_code code_eq_rsp_hi = QB_CODE(7, 0, 32); enum qbman_eq_cmd_e { /* No enqueue, primarily for plugging ORP gaps for dropped frames */ @@ -197,8 +196,7 @@ void qbman_eq_desc_set_response(struct qbman_eq_desc *d, { uint32_t *cl = qb_cl(d); - qb_attr_code_encode(&code_eq_rsp_lo, cl, lower32(storage_phys)); - qb_attr_code_encode(&code_eq_rsp_hi, cl, upper32(storage_phys)); + qb_attr_code_encode_64(&code_eq_rsp_lo, (uint64_t *)cl, storage_phys); qb_attr_code_encode(&code_eq_rsp_stash, cl, !!stash); } @@ -253,7 +251,6 @@ static struct qb_attr_code code_pull_numframes = QB_CODE(0, 8, 4); static struct qb_attr_code code_pull_token = QB_CODE(0, 16, 8); static struct qb_attr_code code_pull_dqsource = QB_CODE(1, 0, 24); static struct qb_attr_code code_pull_rsp_lo = QB_CODE(2, 0, 32); -static struct qb_attr_code code_pull_rsp_hi = QB_CODE(3, 0, 32); enum qb_pull_dt_e { qb_pull_dt_channel, @@ -282,8 +279,7 @@ void qbman_pull_desc_set_storage(struct qbman_pull_desc *d, } qb_attr_code_encode(&code_pull_rls, cl, 1); qb_attr_code_encode(&code_pull_stash, cl, !!stash); - qb_attr_code_encode(&code_pull_rsp_lo, cl, lower32(storage_phys)); - qb_attr_code_encode(&code_pull_rsp_hi, cl, upper32(storage_phys)); + qb_attr_code_encode_64(&code_pull_rsp_lo, (uint64_t *)cl, storage_phys); } void qbman_pull_desc_set_numframes(struct qbman_pull_desc *d, uint8_t numframes) @@ -316,10 +312,10 @@ int qbman_swp_pull(struct qbman_swp *s, struct qbman_pull_desc *d) uint32_t *p; uint32_t *cl = qb_cl(d); - /* TODO: convert to atomic_t */ - if (s->vdq.busy) + if (!atomic_dec_and_test(&s->vdq.busy)) { + atomic_inc(&s->vdq.busy); return -EBUSY; - s->vdq.busy = 1; + } s->vdq.storage = *(void **)&cl[4]; s->vdq.token = qb_attr_code_decode(&code_pull_token, cl); p = qbman_cena_write_start(&s->sys, QBMAN_CENA_SWP_VDQCR); @@ -359,36 +355,44 @@ const struct ldpaa_dq *qbman_swp_dqrr_next(struct qbman_swp *s) { uint32_t verb; uint32_t response_verb; - const struct ldpaa_dq *dq = qbman_cena_read(&s->sys, - QBMAN_CENA_SWP_DQRR(s->dqrr.next_idx)); - const uint32_t *p = qb_cl(dq); + uint32_t flags; + const struct ldpaa_dq *dq; + const uint32_t *p; + dq = qbman_cena_read(&s->sys, QBMAN_CENA_SWP_DQRR(s->dqrr.next_idx)); + p = qb_cl(dq); verb = qb_attr_code_decode(&code_dqrr_verb, p); - /* If the valid-bit isn't of the expected polarity, nothing there */ + + /* If the valid-bit isn't of the expected polarity, nothing there. Note, + * in the DQRR reset bug workaround, we shouldn't need to skip these + * check, because we've already determined that a new entry is available + * and we've invalidated the cacheline before reading it, so the + * valid-bit behaviour is repaired and should tell us what we already + * knew from reading PI. + */ if ((verb & QB_VALID_BIT) != s->dqrr.valid_bit) { qbman_cena_invalidate_prefetch(&s->sys, - QBMAN_CENA_SWP_DQRR( - s->dqrr.next_idx)); + QBMAN_CENA_SWP_DQRR(s->dqrr.next_idx)); return NULL; } /* There's something there. Move "next_idx" attention to the next ring * entry (and prefetch it) before returning what we found. */ s->dqrr.next_idx++; - s->dqrr.next_idx &= 3; /* Wrap around at 4 */ + s->dqrr.next_idx &= QBMAN_DQRR_SIZE - 1; /* Wrap around at 4 */ /* TODO: it's possible to do all this without conditionals, optimise it * later. */ if (!s->dqrr.next_idx) s->dqrr.valid_bit ^= QB_VALID_BIT; - /* VDQCR "no longer busy" hook - if VDQCR shows "busy" and this is a - * VDQCR result, mark it as non-busy. */ - if (s->vdq.busy) { - uint32_t flags = ldpaa_dq_flags(dq); - - response_verb = qb_attr_code_decode(&code_dqrr_response, &verb); - if ((response_verb == QBMAN_DQRR_RESPONSE_DQ) && - (flags & LDPAA_DQ_STAT_VOLATILE)) - s->vdq.busy = 0; - } + + /* If this is the final response to a volatile dequeue command + indicate that the vdq is no longer busy */ + flags = ldpaa_dq_flags(dq); + response_verb = qb_attr_code_decode(&code_dqrr_response, &verb); + if ((response_verb == QBMAN_DQRR_RESPONSE_DQ) && + (flags & LDPAA_DQ_STAT_VOLATILE) && + (flags & LDPAA_DQ_STAT_EXPIRED)) + atomic_inc(&s->vdq.busy); + qbman_cena_invalidate_prefetch(&s->sys, QBMAN_CENA_SWP_DQRR(s->dqrr.next_idx)); return dq; @@ -448,8 +452,10 @@ int qbman_dq_entry_has_newtoken(struct qbman_swp *s, * reset "busy". We instead base the decision on whether the current * result is sitting at the first 'storage' location of the busy * command. */ - if (s->vdq.busy && (s->vdq.storage == dq)) - s->vdq.busy = 0; + if (s->vdq.storage == dq) { + s->vdq.storage = NULL; + atomic_inc(&s->vdq.busy); + } return 1; } diff --git a/drivers/net/fsl-mc/dpio/qbman_portal.h b/drivers/net/fsl-mc/dpio/qbman_portal.h index bb67c3bd06..86e2c3aac4 100644 --- a/drivers/net/fsl-mc/dpio/qbman_portal.h +++ b/drivers/net/fsl-mc/dpio/qbman_portal.h @@ -14,6 +14,10 @@ /* Management command result codes */ #define QBMAN_MC_RSLT_OK 0xf0 +/* TBD: as of QBMan 4.1, DQRR will be 8 rather than 4! */ +#define QBMAN_DQRR_SIZE 4 + + /* --------------------- */ /* portal data structure */ /* --------------------- */ @@ -48,14 +52,13 @@ struct qbman_swp { * to whether or not a command can be submitted, not whether or * not a previously-submitted command is still executing. In * other words, once proof is seen that the previously-submitted - * command is executing, "vdq" is no longer "busy". TODO: - * convert this to "atomic_t" so that it is thread-safe (without - * locking). */ - int busy; + * command is executing, "vdq" is no longer "busy". + */ + atomic_t busy; uint32_t valid_bit; /* 0x00 or 0x80 */ /* We need to determine when vdq is no longer busy. This depends * on whether the "busy" (last-submitted) dequeue command is - * targetting DQRR or main-memory, and detected is based on the + * targeting DQRR or main-memory, and detected is based on the * presence of the dequeue command's "token" showing up in * dequeue entries in DQRR or main-memory (respectively). Debug * builds will, when submitting vdq commands, verify that the @@ -127,6 +130,7 @@ static inline uint32_t qb_attr_code_decode(const struct qb_attr_code *code, return d32_uint32_t(code->lsoffset, code->width, cacheline[code->word]); } + /* encode a field to a cacheline */ static inline void qb_attr_code_encode(const struct qb_attr_code *code, uint32_t *cacheline, uint32_t val) @@ -136,6 +140,12 @@ static inline void qb_attr_code_encode(const struct qb_attr_code *code, | e32_uint32_t(code->lsoffset, code->width, val); } +static inline void qb_attr_code_encode_64(const struct qb_attr_code *code, + uint64_t *cacheline, uint64_t val) +{ + cacheline[code->word / 2] = val; +} + /* ---------------------- */ /* Descriptors/cachelines */ /* ---------------------- */ @@ -144,7 +154,7 @@ static inline void qb_attr_code_encode(const struct qb_attr_code *code, * a "descriptor" type that the caller can instantiate however they like. * Ultimately though, it is just a cacheline of binary storage (or something * smaller when it is known that the descriptor doesn't need all 64 bytes) for - * holding pre-formatted pieces of harware commands. The performance-critical + * holding pre-formatted pieces of hardware commands. The performance-critical * code can then copy these descriptors directly into hardware command * registers more efficiently than trying to construct/format commands * on-the-fly. The API user sees the descriptor as an array of 32-bit words in diff --git a/drivers/net/fsl-mc/dpio/qbman_private.h b/drivers/net/fsl-mc/dpio/qbman_private.h index 2d2556b755..f1f16b828b 100644 --- a/drivers/net/fsl-mc/dpio/qbman_private.h +++ b/drivers/net/fsl-mc/dpio/qbman_private.h @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include -- 2.39.5