2 /* vim: set expandtab tabstop=4 softtabstop=4 shiftwidth=4: */
3 // +----------------------------------------------------------------------+
5 // +----------------------------------------------------------------------+
6 // | Copyright (c) 1997-2002 The PHP Group |
7 // +----------------------------------------------------------------------+
8 // | This source file is subject to version 2.0 of the PHP license, |
9 // | that is bundled with this package in the file LICENSE, and is |
10 // | available at through the world-wide-web at |
11 // | http://www.php.net/license/2_02.txt. |
12 // | If you did not receive a copy of the PHP license and are unable to |
13 // | obtain it through the world-wide-web, please send a note to |
14 // | license@php.net so we can mail you a copy immediately. |
15 // +----------------------------------------------------------------------+
16 // | Authors: Paul M. Jones <pmjones@php.net> |
17 // +----------------------------------------------------------------------+
19 // $Id: Contact_Vcard_Parse.php,v 1.4 2005/05/28 15:40:17 pmjones Exp $
26 * This class parses vCard 2.1 and 3.0 sources from file or text into a
32 * // include this class file
33 * require_once 'Contact_Vcard_Parse.php';
35 * // instantiate a parser object
36 * $parse = new Contact_Vcard_Parse();
38 * // parse a vCard file and store the data
40 * $cardinfo = $parse->fromFile('sample.vcf');
42 * // view the card info array
49 * @author Paul M. Jones <pmjones@php.net>
51 * @package Contact_Vcard_Parse
57 class Contact_Vcard_Parse {
62 * Reads a file for parsing, then sends it to $this->fromText()
63 * and returns the results.
67 * @param array $filename The filename to read for vCard information.
69 * @return array An array of of vCard information extracted from the
72 * @see Contact_Vcard_Parse::fromText()
74 * @see Contact_Vcard_Parse::_fromArray()
78 function fromFile($filename, $decode_qp = true)
80 $text = $this->fileGetContents($filename);
82 if ($text === false) {
85 // dump to, and get return from, the fromText() method.
86 return $this->fromText($text, $decode_qp);
93 * Reads the contents of a file. Included for users whose PHP < 4.3.0.
97 * @param array $filename The filename to read for vCard information.
99 * @return string|bool The contents of the file if it exists and is
100 * readable, or boolean false if not.
102 * @see Contact_Vcard_Parse::fromFile()
106 function fileGetContents($filename)
108 if (file_exists($filename) &&
109 is_readable($filename)) {
112 $len = filesize($filename);
114 $fp = fopen($filename, 'r');
115 while ($line = fread($fp, filesize($filename))) {
132 * Prepares a block of text for parsing, then sends it through and
133 * returns the results from $this->fromArray().
137 * @param array $text A block of text to read for vCard information.
139 * @return array An array of vCard information extracted from the
142 * @see Contact_Vcard_Parse::_fromArray()
146 function fromText($text, $decode_qp = true)
148 // convert all kinds of line endings to Unix-standard and get
149 // rid of double blank lines.
150 $this->convertLineEndings($text);
152 // unfold lines. concat two lines where line 1 ends in \n and
153 // line 2 starts with a whitespace character. only removes
154 // the first whitespace character, leaves others in place.
155 $fold_regex = '(\n)([ |\t])';
156 $text = preg_replace("/$fold_regex/i", "", $text);
158 // massage for Macintosh OS X Address Book (remove nulls that
159 // Address Book puts in for unicode chars)
160 $text = str_replace("\x00", '', $text);
162 // convert the resulting text to an array of lines
163 $lines = explode("\n", $text);
165 // parse the array of lines and return vCard info
166 return $this->_fromArray($lines, $decode_qp);
172 * Converts line endings in text.
174 * Takes any text block and converts all line endings to UNIX
175 * standard. DOS line endings are \r\n, Mac are \r, and UNIX is \n.
177 * NOTE: Acts on the text block in-place; does not return a value.
181 * @param string $text The string on which to convert line endings.
187 function convertLineEndings(&$text)
190 $text = str_replace("\r\n", "\n", $text);
193 $text = str_replace("\r", "\n", $text);
199 * Splits a string into an array at semicolons. Honors backslash-
200 * escaped semicolons (i.e., splits at ';' not '\;').
204 * @param string $text The string to split into an array.
206 * @param bool $convertSingle If splitting the string results in a
207 * single array element, return a string instead of a one-element
210 * @return mixed An array of values, or a single string.
214 function splitBySemi($text, $convertSingle = false)
216 // we use these double-backs (\\) because they get get converted
217 // to single-backs (\) by preg_split. the quad-backs (\\\\) end
218 // up as as double-backs (\\), which is what preg_split requires
219 // to indicate a single backslash (\). what a mess.
220 $regex = '(?<!\\\\)(\;)';
221 $tmp = preg_split("/$regex/i", $text);
223 // if there is only one array-element and $convertSingle is
224 // true, then return only the value of that one array element
225 // (instead of returning the array).
226 if ($convertSingle && count($tmp) == 1) {
236 * Splits a string into an array at commas. Honors backslash-
237 * escaped commas (i.e., splits at ',' not '\,').
241 * @param string $text The string to split into an array.
243 * @param bool $convertSingle If splitting the string results in a
244 * single array element, return a string instead of a one-element
247 * @return mixed An array of values, or a single string.
251 function splitByComma($text, $convertSingle = false)
253 // we use these double-backs (\\) because they get get converted
254 // to single-backs (\) by preg_split. the quad-backs (\\\\) end
255 // up as as double-backs (\\), which is what preg_split requires
256 // to indicate a single backslash (\). ye gods, how ugly.
257 $regex = '(?<!\\\\)(\,)';
258 $tmp = preg_split("/$regex/i", $text);
260 // if there is only one array-element and $convertSingle is
261 // true, then return only the value of that one array element
262 // (instead of returning the array).
263 if ($convertSingle && count($tmp) == 1) {
273 * Used to make string human-readable after being a vCard value.
279 * literal \n => newline
283 * @param mixed $text The text to unescape.
289 function unescape(&$text)
291 if (is_array($text)) {
292 foreach ($text as $key => $val) {
293 $this->unescape($val);
297 $text = str_replace('\:', ':', $text);
298 $text = str_replace('\;', ';', $text);
299 $text = str_replace('\,', ',', $text);
300 $text = str_replace('\n', "\n", $text);
307 * Emulated destructor.
310 * @return boolean true
314 function _Contact_Vcard_Parse()
322 * Parses an array of source lines and returns an array of vCards.
323 * Each element of the array is itself an array expressing the types,
324 * parameters, and values of each part of the vCard. Processes both
325 * 2.1 and 3.0 vCard sources.
329 * @param array $source An array of lines to be read for vCard
332 * @return array An array of of vCard information extracted from the
337 function _fromArray($source, $decode_qp = true)
339 // the info array will hold all resulting vCard information.
342 // tells us whether the source text indicates the beginning of a
343 // new vCard with a BEGIN:VCARD tag.
346 // holds information about the current vCard being read from the
350 // loop through each line in the source array
351 foreach ($source as $line) {
353 // if the line is blank, skip it.
354 if (trim($line) == '') {
358 // find the first instance of ':' on the line. The part
359 // to the left of the colon is the type and parameters;
360 // the part to the right of the colon is the value data.
361 $pos = strpos($line, ':');
363 // if there is no colon, skip the line.
364 if ($pos === false) {
368 // get the left and right portions
369 $left = trim(substr($line, 0, $pos));
370 $right = trim(substr($line, $pos+1, strlen($line)));
372 // have we started yet?
375 // nope. does this line indicate the beginning of
377 if (strtoupper($left) == 'BEGIN' &&
378 strtoupper($right) == 'VCARD') {
380 // tell the loop that we've begun a new card
384 // regardless, loop to the next line of source. if begin
385 // is still false, the next loop will check the line. if
386 // begin has now been set to true, the loop will start
387 // collecting card info.
392 // yep, we've started, but we don't know how far along
393 // we are in the card. is this the ending line of the
395 if (strtoupper($left) == 'END' &&
396 strtoupper($right) == 'VCARD') {
398 // yep, we're done. keep the info from the current
402 // ...and reset to grab a new card if one exists in
409 // we're not on an ending line, so collect info from
410 // this line into the current card. split the
411 // left-portion of the line into a type-definition
412 // (the kind of information) and parameters for the
414 $typedef = $this->_getTypeDef($left);
415 $params = $this->_getParams($left);
417 // if we are decoding quoted-printable, do so now.
418 // QUOTED-PRINTABLE is not allowed in version 3.0,
419 // but we don't check for versioning, so we do it
421 $this->_decode_qp($params, $right);
423 // now get the value-data from the line, based on
428 // structured name of the person
429 $value = $this->_parseN($right);
433 // structured address of the person
434 $value = $this->_parseADR($right);
439 $value = $this->_parseNICKNAME($right);
443 // organizations the person belongs to
444 $value = $this->_parseORG($right);
448 // categories to which this card is assigned
449 $value = $this->_parseCATEGORIES($right);
453 // geographic coordinates
454 $value = $this->_parseGEO($right);
458 // by default, just grab the plain value. keep
459 // as an array to make sure *all* values are
460 // arrays. for consistency. ;-)
461 $value = array(array($right));
465 // add the type, parameters, and value to the
466 // current card array. note that we allow multiple
467 // instances of the same type, which might be dumb
468 // in some cases (e.g., N).
469 $card[$typedef][] = array(
477 $this->unescape($info);
484 * Takes a vCard line and extracts the Type-Definition for the line.
488 * @param string $text A left-part (before-the-colon part) from a
491 * @return string The type definition for the line.
495 function _getTypeDef($text)
497 // split the text by semicolons
498 $split = $this->splitBySemi($text);
500 // only return first element (the typedef)
501 return strtoupper($split[0]);
507 * Finds the Type-Definition parameters for a vCard line.
511 * @param string $text A left-part (before-the-colon part) from a
514 * @return mixed An array of parameters.
518 function _getParams($text)
520 // split the text by semicolons into an array
521 $split = $this->splitBySemi($text);
523 // drop the first element of the array (the type-definition)
526 // set up an array to retain the parameters, if any
529 // loop through each parameter. the params may be in the format...
530 // "TYPE=type1,type2,type3"
532 // "TYPE=type1;TYPE=type2;TYPE=type3"
533 foreach ($split as $full) {
535 // split the full parameter at the equal sign so we can tell
536 // the parameter name from the parameter value
537 $tmp = explode("=", $full);
539 // the key is the left portion of the parameter (before
540 // '='). if in 2.1 format, the key may in fact be the
541 // parameter value, not the parameter name.
542 $key = strtoupper(trim($tmp[0]));
544 // get the parameter name by checking to see if it's in
545 // vCard 2.1 or 3.0 format.
546 $name = $this->_getParamName($key);
548 // list of all parameter values
549 $listall = trim($tmp[1]);
551 // if there is a value-list for this parameter, they are
552 // separated by commas, so split them out too.
553 $list = $this->splitByComma($listall);
555 // now loop through each value in the parameter and retain
556 // it. if the value is blank, that means it's a 2.1-style
557 // param, and the key itself is the value.
558 foreach ($list as $val) {
559 if (trim($val) != '') {
560 // 3.0 formatted parameter
561 $params[$name][] = trim($val);
563 // 2.1 formatted parameter
564 $params[$name][] = $key;
568 // if, after all this, there are no parameter values for the
569 // parameter name, retain no info about the parameter (saves
570 // ram and checking-time later).
571 if (count($params[$name]) == 0) {
572 unset($params[$name]);
576 // return the parameters array.
583 * Looks at the parameters of a vCard line; if one of them is
584 * ENCODING[] => QUOTED-PRINTABLE then decode the text in-place.
588 * @param array $params A parameter array from a vCard line.
590 * @param string $text A right-part (after-the-colon part) from a
597 function _decode_qp(&$params, &$text)
599 // loop through each parameter
600 foreach ($params as $param_key => $param_val) {
602 // check to see if it's an encoding param
603 if (trim(strtoupper($param_key)) == 'ENCODING') {
605 // loop through each encoding param value
606 foreach ($param_val as $enc_key => $enc_val) {
608 // if any of the values are QP, decode the text
609 // in-place and return
610 if (trim(strtoupper($enc_val)) == 'QUOTED-PRINTABLE') {
611 $text = quoted_printable_decode($text);
622 * Returns parameter names from 2.1-formatted vCards.
624 * The vCard 2.1 specification allows parameter values without a
625 * name. The parameter name is then determined from the unique
628 * Shamelessly lifted from Frank Hellwig <frank@hellwig.org> and his
629 * vCard PHP project <http://vcardphp.sourceforge.net>.
633 * @param string $value The first element in a parameter name-value
636 * @return string The proper parameter name (TYPE, ENCODING, or
641 function _getParamName($value)
643 static $types = array (
644 'DOM', 'INTL', 'POSTAL', 'PARCEL','HOME', 'WORK',
645 'PREF', 'VOICE', 'FAX', 'MSG', 'CELL', 'PAGER',
646 'BBS', 'MODEM', 'CAR', 'ISDN', 'VIDEO',
647 'AOL', 'APPLELINK', 'ATTMAIL', 'CIS', 'EWORLD',
648 'INTERNET', 'IBMMAIL', 'MCIMAIL',
649 'POWERSHARE', 'PRODIGY', 'TLX', 'X400',
650 'GIF', 'CGM', 'WMF', 'BMP', 'MET', 'PMB', 'DIB',
651 'PICT', 'TIFF', 'PDF', 'PS', 'JPEG', 'QTIME',
652 'MPEG', 'MPEG2', 'AVI',
653 'WAVE', 'AIFF', 'PCM',
657 // CONTENT-ID added by pmj
658 static $values = array (
659 'INLINE', 'URL', 'CID', 'CONTENT-ID'
663 static $encodings = array (
664 '7BIT', '8BIT', 'QUOTED-PRINTABLE', 'BASE64'
667 // changed by pmj to the following so that the name defaults to
668 // whatever the original value was. Frank Hellwig's original
669 // code was "$name = 'UNKNOWN'".
672 if (in_array($value, $types)) {
674 } elseif (in_array($value, $values)) {
676 } elseif (in_array($value, $encodings)) {
686 * Parses a vCard line value identified as being of the "N"
687 * (structured name) type-defintion.
691 * @param string $text The right-part (after-the-colon part) of a
694 * @return array An array of key-value pairs where the key is the
695 * portion-name and the value is the portion-value. The value itself
696 * may be an array as well if multiple comma-separated values were
697 * indicated in the vCard source.
701 function _parseN($text)
703 // make sure there are always at least 5 elements
704 $tmp = array_pad($this->splitBySemi($text), 5, '');
706 $this->splitByComma($tmp[0]), // family (last)
707 $this->splitByComma($tmp[1]), // given (first)
708 $this->splitByComma($tmp[2]), // addl (middle)
709 $this->splitByComma($tmp[3]), // prefix
710 $this->splitByComma($tmp[4]) // suffix
717 * Parses a vCard line value identified as being of the "ADR"
718 * (structured address) type-defintion.
722 * @param string $text The right-part (after-the-colon part) of a
725 * @return array An array of key-value pairs where the key is the
726 * portion-name and the value is the portion-value. The value itself
727 * may be an array as well if multiple comma-separated values were
728 * indicated in the vCard source.
732 function _parseADR($text)
734 // make sure there are always at least 7 elements
735 $tmp = array_pad($this->splitBySemi($text), 7, '');
737 $this->splitByComma($tmp[0]), // pob
738 $this->splitByComma($tmp[1]), // extend
739 $this->splitByComma($tmp[2]), // street
740 $this->splitByComma($tmp[3]), // locality (city)
741 $this->splitByComma($tmp[4]), // region (state)
742 $this->splitByComma($tmp[5]), // postcode (ZIP)
743 $this->splitByComma($tmp[6]) // country
750 * Parses a vCard line value identified as being of the "NICKNAME"
751 * (informal or descriptive name) type-defintion.
755 * @param string $text The right-part (after-the-colon part) of a
758 * @return array An array of nicknames.
762 function _parseNICKNAME($text)
764 return array($this->splitByComma($text));
770 * Parses a vCard line value identified as being of the "ORG"
771 * (organizational info) type-defintion.
775 * @param string $text The right-part (after-the-colon part) of a
778 * @return array An array of organizations; each element of the array
779 * is itself an array, which indicates primary organization and
784 function _parseORG($text)
786 $tmp = $this->splitbySemi($text);
788 foreach ($tmp as $val) {
789 $list[] = array($val);
798 * Parses a vCard line value identified as being of the "CATEGORIES"
799 * (card-category) type-defintion.
803 * @param string $text The right-part (after-the-colon part) of a
806 * @return mixed An array of categories.
810 function _parseCATEGORIES($text)
812 return array($this->splitByComma($text));
818 * Parses a vCard line value identified as being of the "GEO"
819 * (geographic coordinate) type-defintion.
823 * @param string $text The right-part (after-the-colon part) of a
826 * @return mixed An array of lat-lon geocoords.
830 function _parseGEO($text)
832 // make sure there are always at least 2 elements
833 $tmp = array_pad($this->splitBySemi($text), 2, '');
835 array($tmp[0]), // lat
836 array($tmp[1]) // lon