1 // SPDX-License-Identifier: GPL-2.0+
3 * This file is part of UBIFS.
5 * Copyright (C) 2006-2008 Nokia Corporation.
7 * Authors: Artem Bityutskiy (Битюцкий Артём)
12 * This file is a part of UBIFS journal implementation and contains various
13 * functions which manipulate the log. The log is a fixed area on the flash
14 * which does not contain any data but refers to buds. The log is a part of the
19 #include <linux/err.h>
23 static int dbg_check_bud_bytes(struct ubifs_info *c);
26 * ubifs_search_bud - search bud LEB.
27 * @c: UBIFS file-system description object
28 * @lnum: logical eraseblock number to search
30 * This function searches bud LEB @lnum. Returns bud description object in case
31 * of success and %NULL if there is no bud with this LEB number.
33 struct ubifs_bud *ubifs_search_bud(struct ubifs_info *c, int lnum)
36 struct ubifs_bud *bud;
38 spin_lock(&c->buds_lock);
41 bud = rb_entry(p, struct ubifs_bud, rb);
44 else if (lnum > bud->lnum)
47 spin_unlock(&c->buds_lock);
51 spin_unlock(&c->buds_lock);
56 * ubifs_get_wbuf - get the wbuf associated with a LEB, if there is one.
57 * @c: UBIFS file-system description object
58 * @lnum: logical eraseblock number to search
60 * This functions returns the wbuf for @lnum or %NULL if there is not one.
62 struct ubifs_wbuf *ubifs_get_wbuf(struct ubifs_info *c, int lnum)
65 struct ubifs_bud *bud;
71 spin_lock(&c->buds_lock);
74 bud = rb_entry(p, struct ubifs_bud, rb);
77 else if (lnum > bud->lnum)
81 spin_unlock(&c->buds_lock);
82 return &c->jheads[jhead].wbuf;
85 spin_unlock(&c->buds_lock);
90 * empty_log_bytes - calculate amount of empty space in the log.
91 * @c: UBIFS file-system description object
93 static inline long long empty_log_bytes(const struct ubifs_info *c)
97 h = (long long)c->lhead_lnum * c->leb_size + c->lhead_offs;
98 t = (long long)c->ltail_lnum * c->leb_size;
101 return c->log_bytes - h + t;
104 else if (c->lhead_lnum != c->ltail_lnum)
111 * ubifs_add_bud - add bud LEB to the tree of buds and its journal head list.
112 * @c: UBIFS file-system description object
113 * @bud: the bud to add
115 void ubifs_add_bud(struct ubifs_info *c, struct ubifs_bud *bud)
117 struct rb_node **p, *parent = NULL;
119 struct ubifs_jhead *jhead;
121 spin_lock(&c->buds_lock);
122 p = &c->buds.rb_node;
125 b = rb_entry(parent, struct ubifs_bud, rb);
126 ubifs_assert(bud->lnum != b->lnum);
127 if (bud->lnum < b->lnum)
133 rb_link_node(&bud->rb, parent, p);
134 rb_insert_color(&bud->rb, &c->buds);
136 jhead = &c->jheads[bud->jhead];
137 list_add_tail(&bud->list, &jhead->buds_list);
139 ubifs_assert(c->replaying && c->ro_mount);
142 * Note, although this is a new bud, we anyway account this space now,
143 * before any data has been written to it, because this is about to
144 * guarantee fixed mount time, and this bud will anyway be read and
147 c->bud_bytes += c->leb_size - bud->start;
149 dbg_log("LEB %d:%d, jhead %s, bud_bytes %lld", bud->lnum,
150 bud->start, dbg_jhead(bud->jhead), c->bud_bytes);
151 spin_unlock(&c->buds_lock);
155 * ubifs_add_bud_to_log - add a new bud to the log.
156 * @c: UBIFS file-system description object
157 * @jhead: journal head the bud belongs to
158 * @lnum: LEB number of the bud
159 * @offs: starting offset of the bud
161 * This function writes reference node for the new bud LEB @lnum it to the log,
162 * and adds it to the buds tress. It also makes sure that log size does not
163 * exceed the 'c->max_bud_bytes' limit. Returns zero in case of success,
164 * %-EAGAIN if commit is required, and a negative error codes in case of
167 int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs)
170 struct ubifs_bud *bud;
171 struct ubifs_ref_node *ref;
173 bud = kmalloc(sizeof(struct ubifs_bud), GFP_NOFS);
176 ref = kzalloc(c->ref_node_alsz, GFP_NOFS);
182 mutex_lock(&c->log_mutex);
183 ubifs_assert(!c->ro_media && !c->ro_mount);
189 /* Make sure we have enough space in the log */
190 if (empty_log_bytes(c) - c->ref_node_alsz < c->min_log_bytes) {
191 dbg_log("not enough log space - %lld, required %d",
192 empty_log_bytes(c), c->min_log_bytes);
193 ubifs_commit_required(c);
199 * Make sure the amount of space in buds will not exceed the
200 * 'c->max_bud_bytes' limit, because we want to guarantee mount time
203 * It is not necessary to hold @c->buds_lock when reading @c->bud_bytes
204 * because we are holding @c->log_mutex. All @c->bud_bytes take place
205 * when both @c->log_mutex and @c->bud_bytes are locked.
207 if (c->bud_bytes + c->leb_size - offs > c->max_bud_bytes) {
208 dbg_log("bud bytes %lld (%lld max), require commit",
209 c->bud_bytes, c->max_bud_bytes);
210 ubifs_commit_required(c);
216 * If the journal is full enough - start background commit. Note, it is
217 * OK to read 'c->cmt_state' without spinlock because integer reads
218 * are atomic in the kernel.
220 if (c->bud_bytes >= c->bg_bud_bytes &&
221 c->cmt_state == COMMIT_RESTING) {
222 dbg_log("bud bytes %lld (%lld max), initiate BG commit",
223 c->bud_bytes, c->max_bud_bytes);
224 ubifs_request_bg_commit(c);
231 ref->ch.node_type = UBIFS_REF_NODE;
232 ref->lnum = cpu_to_le32(bud->lnum);
233 ref->offs = cpu_to_le32(bud->start);
234 ref->jhead = cpu_to_le32(jhead);
236 if (c->lhead_offs > c->leb_size - c->ref_node_alsz) {
237 c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum);
238 ubifs_assert(c->lhead_lnum != c->ltail_lnum);
242 if (c->lhead_offs == 0) {
243 /* Must ensure next log LEB has been unmapped */
244 err = ubifs_leb_unmap(c, c->lhead_lnum);
249 if (bud->start == 0) {
251 * Before writing the LEB reference which refers an empty LEB
252 * to the log, we have to make sure it is mapped, because
253 * otherwise we'd risk to refer an LEB with garbage in case of
254 * an unclean reboot, because the target LEB might have been
255 * unmapped, but not yet physically erased.
257 err = ubifs_leb_map(c, bud->lnum);
262 dbg_log("write ref LEB %d:%d",
263 c->lhead_lnum, c->lhead_offs);
264 err = ubifs_write_node(c, ref, UBIFS_REF_NODE_SZ, c->lhead_lnum,
269 c->lhead_offs += c->ref_node_alsz;
271 ubifs_add_bud(c, bud);
273 mutex_unlock(&c->log_mutex);
278 mutex_unlock(&c->log_mutex);
285 * remove_buds - remove used buds.
286 * @c: UBIFS file-system description object
288 * This function removes use buds from the buds tree. It does not remove the
289 * buds which are pointed to by journal heads.
291 static void remove_buds(struct ubifs_info *c)
295 ubifs_assert(list_empty(&c->old_buds));
296 c->cmt_bud_bytes = 0;
297 spin_lock(&c->buds_lock);
298 p = rb_first(&c->buds);
300 struct rb_node *p1 = p;
301 struct ubifs_bud *bud;
302 struct ubifs_wbuf *wbuf;
305 bud = rb_entry(p1, struct ubifs_bud, rb);
306 wbuf = &c->jheads[bud->jhead].wbuf;
308 if (wbuf->lnum == bud->lnum) {
310 * Do not remove buds which are pointed to by journal
311 * heads (non-closed buds).
313 c->cmt_bud_bytes += wbuf->offs - bud->start;
314 dbg_log("preserve %d:%d, jhead %s, bud bytes %d, cmt_bud_bytes %lld",
315 bud->lnum, bud->start, dbg_jhead(bud->jhead),
316 wbuf->offs - bud->start, c->cmt_bud_bytes);
317 bud->start = wbuf->offs;
319 c->cmt_bud_bytes += c->leb_size - bud->start;
320 dbg_log("remove %d:%d, jhead %s, bud bytes %d, cmt_bud_bytes %lld",
321 bud->lnum, bud->start, dbg_jhead(bud->jhead),
322 c->leb_size - bud->start, c->cmt_bud_bytes);
323 rb_erase(p1, &c->buds);
325 * If the commit does not finish, the recovery will need
326 * to replay the journal, in which case the old buds
327 * must be unchanged. Do not release them until post
328 * commit i.e. do not allow them to be garbage
331 list_move(&bud->list, &c->old_buds);
334 spin_unlock(&c->buds_lock);
338 * ubifs_log_start_commit - start commit.
339 * @c: UBIFS file-system description object
340 * @ltail_lnum: return new log tail LEB number
342 * The commit operation starts with writing "commit start" node to the log and
343 * reference nodes for all journal heads which will define new journal after
344 * the commit has been finished. The commit start and reference nodes are
345 * written in one go to the nearest empty log LEB (hence, when commit is
346 * finished UBIFS may safely unmap all the previous log LEBs). This function
347 * returns zero in case of success and a negative error code in case of
350 int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum)
353 struct ubifs_cs_node *cs;
354 struct ubifs_ref_node *ref;
355 int err, i, max_len, len;
357 err = dbg_check_bud_bytes(c);
361 max_len = UBIFS_CS_NODE_SZ + c->jhead_cnt * UBIFS_REF_NODE_SZ;
362 max_len = ALIGN(max_len, c->min_io_size);
363 buf = cs = kmalloc(max_len, GFP_NOFS);
367 cs->ch.node_type = UBIFS_CS_NODE;
368 cs->cmt_no = cpu_to_le64(c->cmt_no);
369 ubifs_prepare_node(c, cs, UBIFS_CS_NODE_SZ, 0);
372 * Note, we do not lock 'c->log_mutex' because this is the commit start
373 * phase and we are exclusively using the log. And we do not lock
374 * write-buffer because nobody can write to the file-system at this
378 len = UBIFS_CS_NODE_SZ;
379 for (i = 0; i < c->jhead_cnt; i++) {
380 int lnum = c->jheads[i].wbuf.lnum;
381 int offs = c->jheads[i].wbuf.offs;
383 if (lnum == -1 || offs == c->leb_size)
386 dbg_log("add ref to LEB %d:%d for jhead %s",
387 lnum, offs, dbg_jhead(i));
389 ref->ch.node_type = UBIFS_REF_NODE;
390 ref->lnum = cpu_to_le32(lnum);
391 ref->offs = cpu_to_le32(offs);
392 ref->jhead = cpu_to_le32(i);
394 ubifs_prepare_node(c, ref, UBIFS_REF_NODE_SZ, 0);
395 len += UBIFS_REF_NODE_SZ;
398 ubifs_pad(c, buf + len, ALIGN(len, c->min_io_size) - len);
400 /* Switch to the next log LEB */
402 c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum);
403 ubifs_assert(c->lhead_lnum != c->ltail_lnum);
407 /* Must ensure next LEB has been unmapped */
408 err = ubifs_leb_unmap(c, c->lhead_lnum);
412 len = ALIGN(len, c->min_io_size);
413 dbg_log("writing commit start at LEB %d:0, len %d", c->lhead_lnum, len);
414 err = ubifs_leb_write(c, c->lhead_lnum, cs, 0, len);
418 *ltail_lnum = c->lhead_lnum;
420 c->lhead_offs += len;
421 if (c->lhead_offs == c->leb_size) {
422 c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum);
429 * We have started the commit and now users may use the rest of the log
432 c->min_log_bytes = 0;
440 * ubifs_log_end_commit - end commit.
441 * @c: UBIFS file-system description object
442 * @ltail_lnum: new log tail LEB number
444 * This function is called on when the commit operation was finished. It
445 * moves log tail to new position and updates the master node so that it stores
446 * the new log tail LEB number. Returns zero in case of success and a negative
447 * error code in case of failure.
449 int ubifs_log_end_commit(struct ubifs_info *c, int ltail_lnum)
454 * At this phase we have to lock 'c->log_mutex' because UBIFS allows FS
455 * writes during commit. Its only short "commit" start phase when
456 * writers are blocked.
458 mutex_lock(&c->log_mutex);
460 dbg_log("old tail was LEB %d:0, new tail is LEB %d:0",
461 c->ltail_lnum, ltail_lnum);
463 c->ltail_lnum = ltail_lnum;
465 * The commit is finished and from now on it must be guaranteed that
466 * there is always enough space for the next commit.
468 c->min_log_bytes = c->leb_size;
470 spin_lock(&c->buds_lock);
471 c->bud_bytes -= c->cmt_bud_bytes;
472 spin_unlock(&c->buds_lock);
474 err = dbg_check_bud_bytes(c);
478 err = ubifs_write_master(c);
481 mutex_unlock(&c->log_mutex);
486 * ubifs_log_post_commit - things to do after commit is completed.
487 * @c: UBIFS file-system description object
488 * @old_ltail_lnum: old log tail LEB number
490 * Release buds only after commit is completed, because they must be unchanged
491 * if recovery is needed.
493 * Unmap log LEBs only after commit is completed, because they may be needed for
496 * This function returns %0 on success and a negative error code on failure.
498 int ubifs_log_post_commit(struct ubifs_info *c, int old_ltail_lnum)
502 while (!list_empty(&c->old_buds)) {
503 struct ubifs_bud *bud;
505 bud = list_entry(c->old_buds.next, struct ubifs_bud, list);
506 err = ubifs_return_leb(c, bud->lnum);
509 list_del(&bud->list);
512 mutex_lock(&c->log_mutex);
513 for (lnum = old_ltail_lnum; lnum != c->ltail_lnum;
514 lnum = ubifs_next_log_lnum(c, lnum)) {
515 dbg_log("unmap log LEB %d", lnum);
516 err = ubifs_leb_unmap(c, lnum);
521 mutex_unlock(&c->log_mutex);
526 * struct done_ref - references that have been done.
536 * done_already - determine if a reference has been done already.
537 * @done_tree: rb-tree to store references that have been done
538 * @lnum: LEB number of reference
540 * This function returns %1 if the reference has been done, %0 if not, otherwise
541 * a negative error code is returned.
543 static int done_already(struct rb_root *done_tree, int lnum)
545 struct rb_node **p = &done_tree->rb_node, *parent = NULL;
550 dr = rb_entry(parent, struct done_ref, rb);
553 else if (lnum > dr->lnum)
559 dr = kzalloc(sizeof(struct done_ref), GFP_NOFS);
565 rb_link_node(&dr->rb, parent, p);
566 rb_insert_color(&dr->rb, done_tree);
572 * destroy_done_tree - destroy the done tree.
573 * @done_tree: done tree to destroy
575 static void destroy_done_tree(struct rb_root *done_tree)
577 struct done_ref *dr, *n;
579 rbtree_postorder_for_each_entry_safe(dr, n, done_tree, rb)
584 * add_node - add a node to the consolidated log.
585 * @c: UBIFS file-system description object
586 * @buf: buffer to which to add
587 * @lnum: LEB number to which to write is passed and returned here
588 * @offs: offset to where to write is passed and returned here
591 * This function returns %0 on success and a negative error code on failure.
593 static int add_node(struct ubifs_info *c, void *buf, int *lnum, int *offs,
596 struct ubifs_ch *ch = node;
597 int len = le32_to_cpu(ch->len), remains = c->leb_size - *offs;
600 int sz = ALIGN(*offs, c->min_io_size), err;
602 ubifs_pad(c, buf + *offs, sz - *offs);
603 err = ubifs_leb_change(c, *lnum, buf, sz);
606 *lnum = ubifs_next_log_lnum(c, *lnum);
609 memcpy(buf + *offs, node, len);
610 *offs += ALIGN(len, 8);
615 * ubifs_consolidate_log - consolidate the log.
616 * @c: UBIFS file-system description object
618 * Repeated failed commits could cause the log to be full, but at least 1 LEB is
619 * needed for commit. This function rewrites the reference nodes in the log
620 * omitting duplicates, and failed CS nodes, and leaving no gaps.
622 * This function returns %0 on success and a negative error code on failure.
624 int ubifs_consolidate_log(struct ubifs_info *c)
626 struct ubifs_scan_leb *sleb;
627 struct ubifs_scan_node *snod;
628 struct rb_root done_tree = RB_ROOT;
629 int lnum, err, first = 1, write_lnum, offs = 0;
632 dbg_rcvry("log tail LEB %d, log head LEB %d", c->ltail_lnum,
634 buf = vmalloc(c->leb_size);
637 lnum = c->ltail_lnum;
640 sleb = ubifs_scan(c, lnum, 0, c->sbuf, 0);
645 list_for_each_entry(snod, &sleb->nodes, list) {
646 switch (snod->type) {
647 case UBIFS_REF_NODE: {
648 struct ubifs_ref_node *ref = snod->node;
649 int ref_lnum = le32_to_cpu(ref->lnum);
651 err = done_already(&done_tree, ref_lnum);
655 err = add_node(c, buf, &write_lnum,
665 err = add_node(c, buf, &write_lnum, &offs,
673 ubifs_scan_destroy(sleb);
674 if (lnum == c->lhead_lnum)
676 lnum = ubifs_next_log_lnum(c, lnum);
679 int sz = ALIGN(offs, c->min_io_size);
681 ubifs_pad(c, buf + offs, sz - offs);
682 err = ubifs_leb_change(c, write_lnum, buf, sz);
685 offs = ALIGN(offs, c->min_io_size);
687 destroy_done_tree(&done_tree);
689 if (write_lnum == c->lhead_lnum) {
690 ubifs_err(c, "log is too full");
693 /* Unmap remaining LEBs */
696 lnum = ubifs_next_log_lnum(c, lnum);
697 err = ubifs_leb_unmap(c, lnum);
700 } while (lnum != c->lhead_lnum);
701 c->lhead_lnum = write_lnum;
702 c->lhead_offs = offs;
703 dbg_rcvry("new log head at %d:%d", c->lhead_lnum, c->lhead_offs);
707 ubifs_scan_destroy(sleb);
709 destroy_done_tree(&done_tree);
715 * dbg_check_bud_bytes - make sure bud bytes calculation are all right.
716 * @c: UBIFS file-system description object
718 * This function makes sure the amount of flash space used by closed buds
719 * ('c->bud_bytes' is correct). Returns zero in case of success and %-EINVAL in
722 static int dbg_check_bud_bytes(struct ubifs_info *c)
725 struct ubifs_bud *bud;
726 long long bud_bytes = 0;
728 if (!dbg_is_chk_gen(c))
731 spin_lock(&c->buds_lock);
732 for (i = 0; i < c->jhead_cnt; i++)
733 list_for_each_entry(bud, &c->jheads[i].buds_list, list)
734 bud_bytes += c->leb_size - bud->start;
736 if (c->bud_bytes != bud_bytes) {
737 ubifs_err(c, "bad bud_bytes %lld, calculated %lld",
738 c->bud_bytes, bud_bytes);
741 spin_unlock(&c->buds_lock);