2 # -*- coding: utf-8 -*-
5 post.py parses post sources from the ./_post directory.
8 __author__ = "Ryan McGuire (ryan@enigmacurry.com)"
9 __date__ = "Mon Feb 2 21:21:04 2009"
25 import blogofile_bf as bf
27 logger = logging.getLogger("blogofile.post")
29 config = bf.config.controllers.blog.post
30 config.mod = sys.modules[globals()["__name__"]]
32 # These are all the Blogofile reserved field names for posts. It is not
33 # recommended that users re-use any of these field names for purposes other
34 # than the one stated.
35 reserved_field_names = {
36 "title" :"A one-line free-form title for the post",
37 "date" :"The date that the post was originally created",
38 "updated" :"The date that the post was last updated",
39 "categories" :"A list of categories that the post pertains to, "\
40 "each seperated by commas",
41 "tags" :"A list of tags that the post pertains to, "\
42 "each seperated by commas",
43 "permalink" :"The full permanent URL for this post. "\
44 "Automatically created if not provided",
45 "path" :"The path from the permalink of the post",
46 "guid" :"A unique hash for the post, if not provided it "\
47 "is assumed that the permalink is the guid",
48 "slug" :"The title part of the URL for the post, if not "\
49 "provided it is automatically generated from the title."\
50 "It is not used if permalink does not contain :title",
51 "author" :"The name of the author of the post",
52 "filters" :"The filter chain to apply to the entire post. "\
53 "If not specified, a default chain based on the file extension is "\
54 "applied. If set to 'None' it disables all filters, even default ones.",
55 "filter" :"synonym for filters",
56 "draft" :"If 'true' or 'True', the post is considered to be only a "\
57 "draft and not to be published.",
58 "source" :"Reserved internally",
59 "yaml" :"Reserved internally",
60 "content" :"Reserved internally",
61 "filename" :"Reserved internally"
65 class PostParseException(Exception):
67 def __init__(self, value):
71 return repr(self.value)
76 Class to describe a blog post and associated metadata
78 def __init__(self, source, filename="Untitled"):
82 self.__timezone = bf.config.controllers.blog.timezone
85 self.categories = set()
90 self.filename = filename
99 def __repr__(self): #pragma: no cover
100 return u"<Post title='{0}' date='{1}'>".format(
101 self.title, self.date.strftime("%Y/%m/%d %H:%M:%S"))
104 """Parse the yaml and fill fields"""
105 yaml_sep = re.compile("^---$", re.MULTILINE)
106 content_parts = yaml_sep.split(self.source, maxsplit=2)
107 if len(content_parts) < 2:
108 raise PostParseException(u"{0}: Post has no YAML section".format(
111 #Extract the yaml at the top
112 self.__parse_yaml(content_parts[1])
113 post_src = content_parts[2]
114 self.__apply_filters(post_src)
116 self.__parse_post_excerpting()
118 def __apply_filters(self, post_src):
119 """Apply filters to the post"""
120 #Apply block level filters (filters on only part of the post)
121 # TODO: block level filters on posts
122 #Apply post level filters (filters on the entire post)
123 #If filter is unspecified, use the default filter based on
125 if self.filters is None:
127 file_extension = os.path.splitext(self.filename)[-1][1:]
128 self.filters = bf.config.controllers.blog.post_default_filters[
132 self.content = bf.filter.run_chain(self.filters, post_src)
134 def __parse_post_excerpting(self):
135 if bf.config.controllers.blog.post_excerpts.enabled:
136 length = bf.config.controllers.blog.post_excerpts.word_length
138 self.excerpt = bf.config.post_excerpt(self.content, length)
139 except AttributeError:
140 self.excerpt = self.__excerpt(length)
142 def __excerpt(self, num_words=50):
143 #Default post excerpting function
144 #Can be overridden in _config.py by
145 #defining post_excerpt(content,num_words)
146 if len(self.excerpt) == 0:
147 """Retrieve excerpt from article"""
148 s = BeautifulSoup.BeautifulSoup(self.content)
149 # get rid of javascript, noscript and css
150 [[tree.extract() for tree in s(elem)] for elem in (
151 'script', 'noscript', 'style')]
153 subtree = s.findAll(text=re.compile("DOCTYPE|xml"))
154 [tree.extract() for tree in subtree]
156 [[tree.extract() for tree in s(elem)] for elem in (
157 'h1', 'h2', 'h3', 'h4', 'h5', 'h6')]
158 text = ''.join(s.findAll(text=True))\
159 .replace("\n", "").split(" ")
160 return " ".join(text[:num_words]) + '...'
162 def __post_process(self):
163 # fill in empty default value
165 self.title = u"Untitled - {0}".format(
166 datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
169 self.slug = re.sub("[ ?]", "-", self.title).lower()
172 self.date = datetime.datetime.now(pytz.timezone(self.__timezone))
174 self.updated = self.date
176 if not self.categories or len(self.categories) == 0:
177 self.categories = set([Category('Uncategorized')])
178 if not self.permalink and \
179 bf.config.controllers.blog.auto_permalink.enabled:
180 self.permalink = bf.config.site.url.rstrip("/") + \
181 bf.config.controllers.blog.auto_permalink.path
183 re.sub(":blog_path", bf.config.blog.path, self.permalink)
185 re.sub(":year", self.date.strftime("%Y"), self.permalink)
187 re.sub(":month", self.date.strftime("%m"), self.permalink)
189 re.sub(":day", self.date.strftime("%d"), self.permalink)
191 re.sub(":title", self.slug, self.permalink)
193 # TODO: slugification should be abstracted out somewhere reusable
194 self.permalink = re.sub(
196 "[ ?]", "-", self.filename).lower(), self.permalink)
198 # Generate sha hash based on title
199 self.permalink = re.sub(":uuid", hashlib.sha1(
200 self.title.encode('utf-8')).hexdigest(), self.permalink)
202 logger.debug(u"Permalink: {0}".format(self.permalink))
204 def __parse_yaml(self, yaml_src):
205 y = yaml.load(yaml_src)
206 # Load all the fields that require special processing first:
207 fields_need_processing = ('permalink', 'guid', 'date', 'updated',
208 'categories', 'tags', 'draft')
210 self.permalink = y['permalink']
211 if self.permalink.startswith("/"):
212 self.permalink = urlparse.urljoin(bf.config.site.url,
214 #Ensure that the permalink is for the same site as bf.config.site.url
215 if not self.permalink.startswith(bf.config.site.url):
216 raise PostParseException(u"{0}: permalink for a different site"
217 " than configured".format(self.filename))
218 logger.debug(u"path from permalink: {0}".format(self.path))
222 self.guid = y['guid']
224 self.guid = self.permalink
226 self.date = pytz.timezone(self.__timezone).localize(
227 datetime.datetime.strptime(y['date'], config.date_format))
231 self.updated = pytz.timezone(self.__timezone).localize(
232 datetime.datetime.strptime(y['updated'], config.date_format))
236 self.categories = set([Category(x.strip()) for x in \
237 y['categories'].split(",")])
241 self.tags = set([x.strip() for x in y['tags'].split(",")])
245 self.filters = y['filter'] #filter is a synonym for filters
251 logger.info(u"Post {0} is set to draft, "
252 "ignoring this post".format(self.filename))
257 # Load the rest of the fields that don't need processing:
258 for field, value in y.items():
259 if field not in fields_need_processing:
260 setattr(self,field,value)
263 """Get just the path portion of a permalink"""
264 return urlparse.urlparse(self.permalink)[2]
266 def __cmp__(self, other_post):
267 "Posts should be comparable by date"
268 return cmp(self.date, other_post.date)
270 def __eq__(self, other_post):
271 return self is other_post
273 def __getattr__(self, name):
275 #Always generate the path from the permalink
276 return self.permapath()
278 raise AttributeError, name
281 class Category(object):
283 def __init__(self, name):
284 self.name = unicode(name)
285 # TODO: slugification should be abstracted out somewhere reusable
286 # TODO: consider making url_name and path read-only properties?
287 self.url_name = self.name.lower().replace(" ", "-")
288 self.path = bf.util.site_path_helper(
289 bf.config.controllers.blog.path,
290 bf.config.controllers.blog.category_dir,
293 def __eq__(self, other):
294 if self.name == other.name:
299 return hash(self.name)
304 def __cmp__(self, other):
305 return cmp(self.name, other.name)
308 def parse_posts(directory):
309 """Retrieve all the posts from the directory specified.
311 Returns a list of the posts sorted in reverse by date."""
313 post_filename_re = re.compile(
314 ".*((\.textile$)|(\.markdown$)|(\.org$)|(\.html$)|(\.txt$)|(\.rst$))")
315 if not os.path.isdir("_posts"):
316 logger.warn("This site has no _posts directory.")
318 post_paths = [f.decode("utf-8") for f in bf.util.recursive_file_list(
319 directory, post_filename_re) if post_filename_re.match(f)]
321 for post_path in post_paths:
322 post_fn = os.path.split(post_path)[1]
323 logger.debug(u"Parsing post: {0}".format(post_path))
324 #IMO codecs.open is broken on Win32.
325 #It refuses to open files without replacing newlines with CR+LF
326 #reverting to regular open and decode:
328 src = open(post_path, "r").read().decode(
329 bf.config.controllers.blog.post_encoding)
331 logger.exception(u"Error reading post: {0}".format(post_path))
334 p = Post(src, filename=post_fn)
335 except PostParseException as e:
336 logger.warning(u"{0} : Skipping this post.".format(e.value))
339 if not (p.permalink is None or p.draft is True):
341 posts.sort(key=operator.attrgetter('date'), reverse=True)