2 /* vim: set expandtab tabstop=4 softtabstop=4 shiftwidth=4: */
3 // +----------------------------------------------------------------------+
5 // +----------------------------------------------------------------------+
6 // | Copyright (c) 1997-2002 The PHP Group |
7 // +----------------------------------------------------------------------+
8 // | This source file is subject to version 2.0 of the PHP license, |
9 // | that is bundled with this package in the file LICENSE, and is |
10 // | available at through the world-wide-web at |
11 // | http://www.php.net/license/2_02.txt. |
12 // | If you did not receive a copy of the PHP license and are unable to |
13 // | obtain it through the world-wide-web, please send a note to |
14 // | license@php.net so we can mail you a copy immediately. |
15 // +----------------------------------------------------------------------+
16 // | Authors: Paul M. Jones <pmjones@ciaweb.net> |
17 // +----------------------------------------------------------------------+
19 // $Id: Contact_Vcard_Parse.php,v 1.1 2004/06/01 08:48:59 gohr Exp $
26 * This class parses vCard 2.1 and 3.0 sources from file or text into a
32 * // include this class file
33 * require_once 'Contact_Vcard_Parse.php';
35 * // instantiate a parser object
36 * $parse = new Contact_Vcard_Parse();
38 * // parse a vCard file and store the data
40 * $cardinfo = $parse->fromFile('sample.vcf');
42 * // view the card info array
49 * @author Paul M. Jones <pmjones@ciaweb.net>
51 * @package Contact_Vcard_Parse
57 class Contact_Vcard_Parse {
62 * Reads a file for parsing, then sends it to $this->fromText()
63 * and returns the results.
67 * @param array $filename The filename to read for vCard information.
69 * @return array An array of of vCard information extracted from the
72 * @see Contact_Vcard_Parse::fromText()
74 * @see Contact_Vcard_Parse::_fromArray()
78 function fromFile($filename, $decode_qp = true)
80 $text = $this->fileGetContents($filename);
82 if ($text === false) {
85 // dump to, and get return from, the fromText() method.
86 return $this->fromText($text, $decode_qp);
93 * Reads the contents of a file. Included for users whose PHP < 4.3.0.
97 * @param array $filename The filename to read for vCard information.
99 * @return string|bool The contents of the file if it exists and is
100 * readable, or boolean false if not.
102 * @see Contact_Vcard_Parse::fromFile()
106 function fileGetContents($filename)
108 if (file_exists($filename) &&
109 is_readable($filename)) {
112 $len = filesize($filename);
114 $fp = fopen($filename, 'r');
115 while ($line = fread($fp, filesize($filename))) {
132 * Prepares a block of text for parsing, then sends it through and
133 * returns the results from $this->fromArray().
137 * @param array $text A block of text to read for vCard information.
139 * @return array An array of vCard information extracted from the
142 * @see Contact_Vcard_Parse::_fromArray()
146 function fromText($text, $decode_qp = true)
148 // convert all kinds of line endings to Unix-standard and get
149 // rid of double blank lines.
150 $this->convertLineEndings($text);
152 // unfold lines. concat two lines where line 1 ends in \n and
153 // line 2 starts with a whitespace character. only removes
154 // the first whitespace character, leaves others in place.
155 $fold_regex = '(\n)([ |\t])';
156 $text = preg_replace("/$fold_regex/i", "", $text);
158 // massage for Macintosh OS X Address Book (remove nulls that
159 // Address Book puts in for unicode chars)
160 $text = str_replace("\x00", '', $text);
162 // convert the resulting text to an array of lines
163 $lines = explode("\n", $text);
165 // parse the array of lines and return vCard info
166 return $this->_fromArray($lines, $decode_qp);
172 * Converts line endings in text.
174 * Takes any text block and converts all line endings to UNIX
175 * standard. DOS line endings are \r\n, Mac are \r, and UNIX is \n.
177 * NOTE: Acts on the text block in-place; does not return a value.
181 * @param string $text The string on which to convert line endings.
187 function convertLineEndings(&$text)
190 $text = str_replace("\r\n", "\n", $text);
193 $text = str_replace("\r", "\n", $text);
199 * Splits a string into an array at semicolons. Honors backslash-
200 * escaped semicolons (i.e., splits at ';' not '\;').
204 * @param string $text The string to split into an array.
206 * @param bool $convertSingle If splitting the string results in a
207 * single array element, return a string instead of a one-element
210 * @return mixed An array of values, or a single string.
214 function splitBySemi($text, $convertSingle = false)
216 // we use these double-backs (\\) because they get get converted
217 // to single-backs (\) by preg_split. the quad-backs (\\\\) end
218 // up as as double-backs (\\), which is what preg_split requires
219 // to indicate a single backslash (\). what a mess.
220 $regex = '(?<!\\\\)(\;)';
221 $tmp = preg_split("/$regex/i", $text);
223 // if there is only one array-element and $convertSingle is
224 // true, then return only the value of that one array element
225 // (instead of returning the array).
226 if ($convertSingle && count($tmp) == 1) {
236 * Splits a string into an array at commas. Honors backslash-
237 * escaped commas (i.e., splits at ',' not '\,').
241 * @param string $text The string to split into an array.
243 * @param bool $convertSingle If splitting the string results in a
244 * single array element, return a string instead of a one-element
247 * @return mixed An array of values, or a single string.
251 function splitByComma($text, $convertSingle = false)
253 // we use these double-backs (\\) because they get get converted
254 // to single-backs (\) by preg_split. the quad-backs (\\\\) end
255 // up as as double-backs (\\), which is what preg_split requires
256 // to indicate a single backslash (\). ye gods, how ugly.
257 $regex = '(?<!\\\\)(\,)';
258 $tmp = preg_split("/$regex/i", $text);
260 // if there is only one array-element and $convertSingle is
261 // true, then return only the value of that one array element
262 // (instead of returning the array).
263 if ($convertSingle && count($tmp) == 1) {
273 * Used to make string human-readable after being a vCard value.
278 * literal \n => newline
282 * @param mixed $text The text to unescape.
288 function unescape(&$text)
290 if (is_array($text)) {
291 foreach ($text as $key => $val) {
292 $this->unescape($val);
296 $text = str_replace('\;', ';', $text);
297 $text = str_replace('\,', ',', $text);
298 $text = str_replace('\n', "\n", $text);
305 * Emulated destructor.
308 * @return boolean true
312 function _Contact_Vcard_Parse()
320 * Parses an array of source lines and returns an array of vCards.
321 * Each element of the array is itself an array expressing the types,
322 * parameters, and values of each part of the vCard. Processes both
323 * 2.1 and 3.0 vCard sources.
327 * @param array $source An array of lines to be read for vCard
330 * @return array An array of of vCard information extracted from the
335 function _fromArray($source, $decode_qp = true)
337 // the info array will hold all resulting vCard information.
340 // tells us whether the source text indicates the beginning of a
341 // new vCard with a BEGIN:VCARD tag.
344 // holds information about the current vCard being read from the
348 // loop through each line in the source array
349 foreach ($source as $line) {
351 // if the line is blank, skip it.
352 if (trim($line) == '') {
356 // find the first instance of ':' on the line. The part
357 // to the left of the colon is the type and parameters;
358 // the part to the right of the colon is the value data.
359 $pos = strpos($line, ':');
361 // if there is no colon, skip the line.
362 if ($pos === false) {
366 // get the left and right portions
367 $left = trim(substr($line, 0, $pos));
368 $right = trim(substr($line, $pos+1, strlen($line)));
370 // have we started yet?
373 // nope. does this line indicate the beginning of
375 if (strtoupper($left) == 'BEGIN' &&
376 strtoupper($right) == 'VCARD') {
378 // tell the loop that we've begun a new card
382 // regardless, loop to the next line of source. if begin
383 // is still false, the next loop will check the line. if
384 // begin has now been set to true, the loop will start
385 // collecting card info.
390 // yep, we've started, but we don't know how far along
391 // we are in the card. is this the ending line of the
393 if (strtoupper($left) == 'END' &&
394 strtoupper($right) == 'VCARD') {
396 // yep, we're done. keep the info from the current
400 // ...and reset to grab a new card if one exists in
407 // we're not on an ending line, so collect info from
408 // this line into the current card. split the
409 // left-portion of the line into a type-definition
410 // (the kind of information) and parameters for the
412 $typedef = $this->_getTypeDef($left);
413 $params = $this->_getParams($left);
415 // if we are decoding quoted-printable, do so now.
416 // QUOTED-PRINTABLE is not allowed in version 3.0,
417 // but we don't check for versioning, so we do it
419 $this->_decode_qp($params, $right);
421 // now get the value-data from the line, based on
426 // structured name of the person
427 $value = $this->_parseN($right);
431 // structured address of the person
432 $value = $this->_parseADR($right);
437 $value = $this->_parseNICKNAME($right);
441 // organizations the person belongs to
442 $value = $this->_parseORG($right);
446 // categories to which this card is assigned
447 $value = $this->_parseCATEGORIES($right);
451 // geographic coordinates
452 $value = $this->_parseGEO($right);
456 // by default, just grab the plain value. keep
457 // as an array to make sure *all* values are
458 // arrays. for consistency. ;-)
459 $value = array(array($right));
463 // add the type, parameters, and value to the
464 // current card array. note that we allow multiple
465 // instances of the same type, which might be dumb
466 // in some cases (e.g., N).
467 $card[$typedef][] = array(
475 $this->unescape($info);
482 * Takes a vCard line and extracts the Type-Definition for the line.
486 * @param string $text A left-part (before-the-colon part) from a
489 * @return string The type definition for the line.
493 function _getTypeDef($text)
495 // split the text by semicolons
496 $split = $this->splitBySemi($text);
498 // only return first element (the typedef)
505 * Finds the Type-Definition parameters for a vCard line.
509 * @param string $text A left-part (before-the-colon part) from a
512 * @return mixed An array of parameters.
516 function _getParams($text)
518 // split the text by semicolons into an array
519 $split = $this->splitBySemi($text);
521 // drop the first element of the array (the type-definition)
524 // set up an array to retain the parameters, if any
527 // loop through each parameter. the params may be in the format...
528 // "TYPE=type1,type2,type3"
530 // "TYPE=type1;TYPE=type2;TYPE=type3"
531 foreach ($split as $full) {
533 // split the full parameter at the equal sign so we can tell
534 // the parameter name from the parameter value
535 $tmp = explode("=", $full);
537 // the key is the left portion of the parameter (before
538 // '='). if in 2.1 format, the key may in fact be the
539 // parameter value, not the parameter name.
540 $key = strtoupper(trim($tmp[0]));
542 // get the parameter name by checking to see if it's in
543 // vCard 2.1 or 3.0 format.
544 $name = $this->_getParamName($key);
546 // list of all parameter values
547 $listall = trim($tmp[1]);
549 // if there is a value-list for this parameter, they are
550 // separated by commas, so split them out too.
551 $list = $this->splitByComma($listall);
553 // now loop through each value in the parameter and retain
554 // it. if the value is blank, that means it's a 2.1-style
555 // param, and the key itself is the value.
556 foreach ($list as $val) {
557 if (trim($val) != '') {
558 // 3.0 formatted parameter
559 $params[$name][] = trim($val);
561 // 2.1 formatted parameter
562 $params[$name][] = $key;
566 // if, after all this, there are no parameter values for the
567 // parameter name, retain no info about the parameter (saves
568 // ram and checking-time later).
569 if (count($params[$name]) == 0) {
570 unset($params[$name]);
574 // return the parameters array.
581 * Looks at the parameters of a vCard line; if one of them is
582 * ENCODING[] => QUOTED-PRINTABLE then decode the text in-place.
586 * @param array $params A parameter array from a vCard line.
588 * @param string $text A right-part (after-the-colon part) from a
595 function _decode_qp(&$params, &$text)
597 // loop through each parameter
598 foreach ($params as $param_key => $param_val) {
600 // check to see if it's an encoding param
601 if (trim(strtoupper($param_key)) == 'ENCODING') {
603 // loop through each encoding param value
604 foreach ($param_val as $enc_key => $enc_val) {
606 // if any of the values are QP, decode the text
607 // in-place and return
608 if (trim(strtoupper($enc_val)) == 'QUOTED-PRINTABLE') {
609 $text = quoted_printable_decode($text);
620 * Returns parameter names from 2.1-formatted vCards.
622 * The vCard 2.1 specification allows parameter values without a
623 * name. The parameter name is then determined from the unique
626 * Shamelessly lifted from Frank Hellwig <frank@hellwig.org> and his
627 * vCard PHP project <http://vcardphp.sourceforge.net>.
631 * @param string $value The first element in a parameter name-value
634 * @return string The proper parameter name (TYPE, ENCODING, or
639 function _getParamName($value)
641 static $types = array (
642 'DOM', 'INTL', 'POSTAL', 'PARCEL','HOME', 'WORK',
643 'PREF', 'VOICE', 'FAX', 'MSG', 'CELL', 'PAGER',
644 'BBS', 'MODEM', 'CAR', 'ISDN', 'VIDEO',
645 'AOL', 'APPLELINK', 'ATTMAIL', 'CIS', 'EWORLD',
646 'INTERNET', 'IBMMAIL', 'MCIMAIL',
647 'POWERSHARE', 'PRODIGY', 'TLX', 'X400',
648 'GIF', 'CGM', 'WMF', 'BMP', 'MET', 'PMB', 'DIB',
649 'PICT', 'TIFF', 'PDF', 'PS', 'JPEG', 'QTIME',
650 'MPEG', 'MPEG2', 'AVI',
651 'WAVE', 'AIFF', 'PCM',
655 // CONTENT-ID added by pmj
656 static $values = array (
657 'INLINE', 'URL', 'CID', 'CONTENT-ID'
661 static $encodings = array (
662 '7BIT', '8BIT', 'QUOTED-PRINTABLE', 'BASE64'
665 // changed by pmj to the following so that the name defaults to
666 // whatever the original value was. Frank Hellwig's original
667 // code was "$name = 'UNKNOWN'".
670 if (in_array($value, $types)) {
672 } elseif (in_array($value, $values)) {
674 } elseif (in_array($value, $encodings)) {
684 * Parses a vCard line value identified as being of the "N"
685 * (structured name) type-defintion.
689 * @param string $text The right-part (after-the-colon part) of a
692 * @return array An array of key-value pairs where the key is the
693 * portion-name and the value is the portion-value. The value itself
694 * may be an array as well if multiple comma-separated values were
695 * indicated in the vCard source.
699 function _parseN($text)
701 $tmp = $this->splitBySemi($text);
703 $this->splitByComma($tmp[0]), // family (last)
704 $this->splitByComma($tmp[1]), // given (first)
705 $this->splitByComma($tmp[2]), // addl (middle)
706 $this->splitByComma($tmp[3]), // prefix
707 $this->splitByComma($tmp[4]) // suffix
714 * Parses a vCard line value identified as being of the "ADR"
715 * (structured address) type-defintion.
719 * @param string $text The right-part (after-the-colon part) of a
722 * @return array An array of key-value pairs where the key is the
723 * portion-name and the value is the portion-value. The value itself
724 * may be an array as well if multiple comma-separated values were
725 * indicated in the vCard source.
729 function _parseADR($text)
731 $tmp = $this->splitBySemi($text);
733 $this->splitByComma($tmp[0]), // pob
734 $this->splitByComma($tmp[1]), // extend
735 $this->splitByComma($tmp[2]), // street
736 $this->splitByComma($tmp[3]), // locality (city)
737 $this->splitByComma($tmp[4]), // region (state)
738 $this->splitByComma($tmp[5]), // postcode (ZIP)
739 $this->splitByComma($tmp[6]) // country
746 * Parses a vCard line value identified as being of the "NICKNAME"
747 * (informal or descriptive name) type-defintion.
751 * @param string $text The right-part (after-the-colon part) of a
754 * @return array An array of nicknames.
758 function _parseNICKNAME($text)
760 return array($this->splitByComma($text));
766 * Parses a vCard line value identified as being of the "ORG"
767 * (organizational info) type-defintion.
771 * @param string $text The right-part (after-the-colon part) of a
774 * @return array An array of organizations; each element of the array
775 * is itself an array, which indicates primary organization and
780 function _parseORG($text)
782 $tmp = $this->splitbySemi($text);
784 foreach ($tmp as $val) {
785 $list[] = array($val);
794 * Parses a vCard line value identified as being of the "CATEGORIES"
795 * (card-category) type-defintion.
799 * @param string $text The right-part (after-the-colon part) of a
802 * @return mixed An array of categories.
806 function _parseCATEGORIES($text)
808 return array($this->splitByComma($text));
814 * Parses a vCard line value identified as being of the "GEO"
815 * (geographic coordinate) type-defintion.
819 * @param string $text The right-part (after-the-colon part) of a
822 * @return mixed An array of lat-lon geocoords.
826 function _parseGEO($text)
828 $tmp = $this->splitBySemi($text);
830 array($tmp[0]), // lat
831 array($tmp[1]) // lon