2 # -*- coding: utf-8 -*-
5 org.py convert org source file into html file
8 __author__ = "Jaemok Jeong(jmjeong@gmail.com)"
9 __date__ = "Tue Aug 11 12:50:17 2009"
21 from BeautifulSoup import BeautifulSoup
23 import blogofile_bf as bf
25 logger = logging.getLogger("blogofile.org")
28 class EmacsNotFoundException(Exception):
32 post = bf.config.controllers.blog.post.mod
37 Class to Convert org file into html file
39 It composes org-content with source, preamble, and postample.
40 Launches emacs and convert the org-content into html file.
42 Generated html file is processed with BeautifulSoup module to
43 extract body section and title and categories.
46 self.title = title (which is first '*' in org-file)
47 self.category = categories (which is tags in first '*' in org-file)
48 self.date = date (which is scheduled file?)
51 def __init__(self, source):
53 return self.__convert()
56 temp_file = tempfile.NamedTemporaryFile(suffix='.org')
58 temp_file.write(bf.config.blog.emacs_orgmode_preamble)
60 except AttributeError:
62 temp_file.write(self.source.encode(bf.config.blog_post_encoding))
67 pname = bf.config.blog.emacs_binary
68 except AttributeError:
69 raise EmacsNotFoundException("Emacs binary is not defined")
73 if bf.config.blog.emacs_preload_elisp:
74 pname += " --load={0}".format(
75 bf.config.blog.emacs_preload_elisp)
76 except AttributeError:
79 pname += " --visit={0} --funcall org-export-as-html-batch".format(
81 logger.debug("Exec name::: %s" % pname)
83 status, output = commands.getstatusoutput(pname)
84 logger.debug("Convert output:::\n\t%s"%output)
86 raise EmacsNotFoundException("orgfile filter failed")
88 html = temp_file.name[:-4] + '.html'
91 #IMO codecs.open is broken on Win32.
92 #It refuses to open files without replacing newlines with CR+LF
93 #reverting to regular open and decode:
94 content = open(html, "rb").read().decode(bf.config.blog_post_encoding)
96 # remote the temporary file
99 soup = BeautifulSoup(content)
101 # the first h2 section will be used for title, category, and date
102 metaline = soup.find('div', {'id': 'outline-container-1'}).h2
106 self.title = re.sub(' ', '', metaline.contents[0]).strip()
107 except AttributeError:
112 categories = metaline('span', {'class':'tag'})[0].string
113 self.categories = set([post.Category(x)
114 for x in categories.split(' ')])
116 self.categories = None
120 date = metaline('span', {'class':'timestamp'})[0].string # 2009-08-22 Sat 15:22
121 # date_format = "%Y/%m/%d %H:%M:%S"
122 self.date = datetime.datetime.strptime(date, "%Y-%m-%d %a %H:%M")
123 self.date = self.date.replace(
124 tzinfo=pytz.timezone(bf.config.blog_timezone))
128 # delete first h2 section (which is title and category)
131 except AttributeError:
136 toc = soup.find('div',{'id': 'table-of-contents'})
137 content = soup.find('div', {'id': 'outline-container-1'})
140 content = str(toc) + str(content)
142 self.content = str(content).decode(bf.config.blog_post_encoding)
147 if __name__ == '__main__':
149 doctest.testmod(verbose=True)