ergoindemand
asked on
xml parsing in php4
I am trying to parse an xml file in php4 as seen below. I'm able to parse it via SAX, however, the whole thing is being parsed where as I'm only needing to parse a portion of it depending on which product page is called via a browser. For example, instead of it parsing the entire xml file, I'm only needing it to parse the section containing the <pageid>74-5002JEE11</page id> and it's siblings. This is for seo purposes. Any help would be greatly appreciated!
Carol
Carol
<?xml version="1.0" encoding="UTF-8"?>
<products xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<product xsi:type="ProductWithReviews" locale="en_US">
<pageid>74-5002JEE11</pageid>
<name>HON Park Avenue 5000 Series Mid-Back Managers Chair, Henna Cherry/Black Vinyl</name>
<smallstarimagelocation>pwr/engine/images/stars_small.gif</smallstarimagelocation>
<largestarimagelocation>pwr/engine/images/stars.gif</largestarimagelocation>
<newestreviewdate>2010-08-30</newestreviewdate>
<oldestreviewdate>2010-08-24</oldestreviewdate>
<averageoverallrating>5</averageoverallrating>
<average_rating_decimal>5</average_rating_decimal>
<fullreviews>2</fullreviews>
<confirmstatusgroup>
<confirmstatus>Verified Reviewer</confirmstatus>
</confirmstatusgroup>
<taggroup key="bestuses" name="Best Uses">
<tag isuseradded="false" count="1">Computer chair</tag>
</taggroup>
<taggroup key="describeyourself" name="Describe Yourself">
<tag isuseradded="false" count="2">Midrange shopper</tag>
</taggroup>
<taggroup key="primaryuse" name="Primary use">
<tag isuseradded="false" count="2">Business</tag>
</taggroup>
<taggroup key="pros" name="Pros">
<tag isuseradded="false" count="2">Attractive</tag>
<tag isuseradded="false" count="2">Comfortable</tag>
<tag isuseradded="false" count="2">Durable</tag>
<tag isuseradded="false" count="1">Easy to assemble</tag>
<tag isuseradded="false" count="2">Ergonomic</tag>
<tag isuseradded="false" count="2">Good lumbar support</tag>
<tag isuseradded="false" count="2">Rolls smoothly</tag>
</taggroup>
<bottom_line_yes_votes>2</bottom_line_yes_votes>
<bottom_line_no_votes>0</bottom_line_no_votes>
<customerimages>false</customerimages>
<customervideos>false</customervideos>
<inlinefiles>
<inlinefile reviewpage="1">pwr/7gdt43ap/inline/06/30/74__5002JEE11-en_US-1-reviews.html</inlinefile>
</inlinefiles>
<reviews>
<fullreview>
<id>xxxxx</id>
<merchant_review_id>xxxxx</merchant_review_id>
<merchantuserid/>
<status>Approved</status>
<createddate>2010-08-24</createddate>
<helpfulvotes>0</helpfulvotes>
<nothelpfulvotes>0</nothelpfulvotes>
<source>web</source>
<confirmstatusgroup>
<confirmstatus>Verified Reviewer</confirmstatus>
</confirmstatusgroup>
<headline>test</headline>
<overallrating>5</overallrating>
<taggroup key="pros" name="Pros">
<tag isuseradded="false" count="1">Sturdy</tag>
<tag isuseradded="false" count="1">Rolls Smoothly</tag>
<tag isuseradded="false" count="1">Good Lumbar Support</tag>
<tag isuseradded="false" count="1">Comfortable</tag>
<tag isuseradded="false" count="1">Ergonomic</tag>
<tag isuseradded="false" count="1">Attractive Design</tag>
</taggroup>
<taggroup key="describeyourself" name="Describe Yourself">
<tag isuseradded="false" count="1">Midrange Shopper</tag>
</taggroup>
<taggroup key="bestuses" name="Best Uses">
<tag isuseradded="false" count="1">Computer Chair</tag>
</taggroup>
<taggroup key="primaryuse" name="Primary use">
<tag isuseradded="false" count="1">Business</tag>
</taggroup>
<bottom_line>recommended</bottom_line>
<comments>test</comments>
<nickname>carol</nickname>
<location>oregon</location>
<email_address_from_user>xxxxx@xxxxx.com</email_address_from_user>
<site_id>1</site_id>
</fullreview>
<fullreview>
<id>xxxxx</id>
<merchant_review_id>xxxxx</merchant_review_id>
<merchantuserid/>
<status>Approved</status>
<createddate>2010-08-30</createddate>
<helpfulvotes>0</helpfulvotes>
<nothelpfulvotes>0</nothelpfulvotes>
<source>web</source>
<confirmstatusgroup>
<confirmstatus>Verified Reviewer</confirmstatus>
</confirmstatusgroup>
<headline>An awesome chair.</headline>
<overallrating>5</overallrating>
<taggroup key="pros" name="Pros">
<tag isuseradded="false" count="1">Ergonomic</tag>
<tag isuseradded="false" count="1">Comfortable</tag>
<tag isuseradded="false" count="1">Good Lumbar Support</tag>
<tag isuseradded="false" count="1">Easy To Assemble</tag>
<tag isuseradded="false" count="1">Rolls Smoothly</tag>
<tag isuseradded="false" count="1">Attractive Design</tag>
<tag isuseradded="false" count="1">Sturdy</tag>
</taggroup>
<taggroup key="describeyourself" name="Describe Yourself">
<tag isuseradded="false" count="1">Midrange Shopper</tag>
</taggroup>
<taggroup key="primaryuse" name="Primary use">
<tag isuseradded="false" count="1">Business</tag>
</taggroup>
<bottom_line>recommended</bottom_line>
<comments>I bought this chair for my office and love it. I could sit in it all day.</comments>
<nickname>Gerry</nickname>
<location>Grants Pass, OR</location>
<email_address_from_user>xxxxx@xxxxx.com</email_address_from_user>
<site_id>1</site_id>
</fullreview>
</reviews>
</product>
<product xsi:type="ProductWithReviews" locale="en_US">
<pageid>14-PSD-TS-CTM-MS2</pageid>
<name>Dual Pole TV Floor Stand with Tilt Mount - for 37" to 63" Displays</name>
<smallstarimagelocation>pwr/engine/images/stars_small.gif</smallstarimagelocation>
<largestarimagelocation>pwr/engine/images/stars.gif</largestarimagelocation>
<newestreviewdate>2010-08-24</newestreviewdate>
<oldestreviewdate>2010-08-24</oldestreviewdate>
<averageoverallrating>5</averageoverallrating>
<average_rating_decimal>5</average_rating_decimal>
<fullreviews>1</fullreviews>
<confirmstatusgroup>
<confirmstatus>Verified Reviewer</confirmstatus>
</confirmstatusgroup>
<taggroup key="bestuses" name="Best Uses">
<tag isuseradded="false" count="1">Space saver</tag>
</taggroup>
<taggroup key="describeyourself" name="Describe Yourself">
<tag isuseradded="false" count="1">Stylish</tag>
</taggroup>
<taggroup key="primaryuse" name="Primary use">
<tag isuseradded="false" count="1">Business</tag>
</taggroup>
<taggroup key="pros" name="Pros">
<tag isuseradded="false" count="1">Attractive</tag>
<tag isuseradded="false" count="1">Easy to assemble</tag>
<tag isuseradded="false" count="1">Smooth edges</tag>
<tag isuseradded="false" count="1">Supports weight</tag>
</taggroup>
<bottom_line_yes_votes>1</bottom_line_yes_votes>
<bottom_line_no_votes>0</bottom_line_no_votes>
<customerimages>false</customerimages>
<customervideos>false</customervideos>
<inlinefiles>
<inlinefile reviewpage="1">pwr/7gdt43ap/inline/02/48/14__PSD__TS__CTM__MS2-en_US-1-reviews.html</inlinefile>
</inlinefiles>
<reviews>
<fullreview>
<id>xxxxx</id>
<merchant_review_id>xxxxx</merchant_review_id>
<merchantuserid/>
<status>Approved</status>
<createddate>2010-08-24</createddate>
<helpfulvotes>0</helpfulvotes>
<nothelpfulvotes>0</nothelpfulvotes>
<source>web</source>
<confirmstatusgroup>
<confirmstatus>Verified Reviewer</confirmstatus>
</confirmstatusgroup>
<headline>test</headline>
<overallrating>5</overallrating>
<taggroup key="pros" name="Pros">
<tag isuseradded="false" count="1">Attractive Design</tag>
<tag isuseradded="false" count="1">Smooth Edges</tag>
<tag isuseradded="false" count="1">Easy To Assemble</tag>
<tag isuseradded="false" count="1">Supports Weight</tag>
</taggroup>
<taggroup key="describeyourself" name="Describe Yourself">
<tag isuseradded="false" count="1">Stylish</tag>
</taggroup>
<taggroup key="bestuses" name="Best Uses">
<tag isuseradded="false" count="1">Space Saver</tag>
</taggroup>
<taggroup key="primaryuse" name="Primary use">
<tag isuseradded="false" count="1">Business</tag>
</taggroup>
<bottom_line>recommended</bottom_line>
<comments>test</comments>
<nickname>carol</nickname>
<location>oregon</location>
<email_address_from_user>xxxxxl@xxxxx.com</email_address_from_user>
<site_id>1</site_id>
</fullreview>
</reviews>
</product>
<product xsi:type="ProductWithReviews" locale="en_US">
<pageid>18-9414AG-BU</pageid>
<name>Comfort Series Ergonomic Posture Chair - Blue</name>
<smallstarimagelocation>pwr/engine/images/stars_small.gif</smallstarimagelocation>
<largestarimagelocation>pwr/engine/images/stars.gif</largestarimagelocation>
<newestreviewdate>2010-08-24</newestreviewdate>
<oldestreviewdate>2010-08-24</oldestreviewdate>
<averageoverallrating>4</averageoverallrating>
<average_rating_decimal>4</average_rating_decimal>
<fullreviews>1</fullreviews>
<confirmstatusgroup>
<confirmstatus>Unverified</confirmstatus>
</confirmstatusgroup>
<taggroup key="bestuses" name="Best Uses">
<tag isuseradded="false" count="1">Computer chair</tag>
</taggroup>
<taggroup key="describeyourself" name="Describe Yourself">
<tag isuseradded="false" count="1">Midrange shopper</tag>
</taggroup>
<taggroup key="primaryuse" name="Primary use">
<tag isuseradded="false" count="1">Business</tag>
</taggroup>
<taggroup key="pros" name="Pros">
<tag isuseradded="false" count="1">Attractive</tag>
<tag isuseradded="false" count="1">Comfortable</tag>
<tag isuseradded="false" count="1">Durable</tag>
<tag isuseradded="false" count="1">Easy to assemble</tag>
<tag isuseradded="false" count="1">Ergonomic</tag>
<tag isuseradded="false" count="1">Good lumbar support</tag>
<tag isuseradded="false" count="1">Rolls smoothly</tag>
</taggroup>
<bottom_line_yes_votes>1</bottom_line_yes_votes>
<bottom_line_no_votes>0</bottom_line_no_votes>
<customerimages>false</customerimages>
<customervideos>false</customervideos>
<inlinefiles>
<inlinefile reviewpage="1">pwr/7gdt43ap/inline/09/12/18__9414AG__BU-en_US-1-reviews.html</inlinefile>
</inlinefiles>
<reviews>
<fullreview>
<id>xxxxx</id>
<merchant_review_id>xxxxx</merchant_review_id>
<merchantuserid/>
<status>Approved</status>
<createddate>2010-08-24</createddate>
<helpfulvotes>0</helpfulvotes>
<nothelpfulvotes>0</nothelpfulvotes>
<source>web</source>
<confirmstatusgroup>
<confirmstatus>Unverified</confirmstatus>
</confirmstatusgroup>
<headline>test</headline>
<overallrating>4</overallrating>
<taggroup key="pros" name="Pros">
<tag isuseradded="false" count="1">Attractive Design</tag>
<tag isuseradded="false" count="1">Comfortable</tag>
<tag isuseradded="false" count="1">Sturdy</tag>
<tag isuseradded="false" count="1">Rolls Smoothly</tag>
<tag isuseradded="false" count="1">Ergonomic</tag>
<tag isuseradded="false" count="1">Easy To Assemble</tag>
<tag isuseradded="false" count="1">Good Lumbar Support</tag>
</taggroup>
<taggroup key="describeyourself" name="Describe Yourself">
<tag isuseradded="false" count="1">Midrange Shopper</tag>
</taggroup>
<taggroup key="primaryuse" name="Primary use">
<tag isuseradded="false" count="1">Business</tag>
</taggroup>
<taggroup key="bestuses" name="Best Uses">
<tag isuseradded="false" count="1">Computer Chair</tag>
</taggroup>
<bottom_line>recommended</bottom_line>
<comments>test</comments>
<nickname>carol</nickname>
<location>oregon</location>
<site_id>1</site_id>
</fullreview>
</reviews>
</product>
<product xsi:type="ProductWithReviews" locale="en_US">
<pageid>18-640LTL</pageid>
<name>VariTask LT Adjustable Corner Computer Workstation</name>
<smallstarimagelocation>pwr/engine/images/stars_small.gif</smallstarimagelocation>
<largestarimagelocation>pwr/engine/images/stars.gif</largestarimagelocation>
<newestreviewdate>2010-08-23</newestreviewdate>
<oldestreviewdate>2010-08-23</oldestreviewdate>
<averageoverallrating>5</averageoverallrating>
<average_rating_decimal>5</average_rating_decimal>
<fullreviews>1</fullreviews>
<confirmstatusgroup>
<confirmstatus>Unverified</confirmstatus>
</confirmstatusgroup>
<taggroup key="bestuses" name="Best Uses">
<tag isuseradded="false" count="1">Informal use</tag>
</taggroup>
<taggroup key="describeyourself" name="Describe Yourself">
<tag isuseradded="false" count="1">Midrange shopper</tag>
</taggroup>
<taggroup key="primaryuse" name="Primary use">
<tag isuseradded="false" count="1">Business</tag>
</taggroup>
<taggroup key="pros" name="Pros">
<tag isuseradded="false" count="1">Attractive</tag>
<tag isuseradded="false" count="1">Comfortable</tag>
<tag isuseradded="false" count="1">Well built / quality</tag>
</taggroup>
<bottom_line_yes_votes>1</bottom_line_yes_votes>
<bottom_line_no_votes>0</bottom_line_no_votes>
<customerimages>false</customerimages>
<customervideos>false</customervideos>
<inlinefiles>
<inlinefile reviewpage="1">pwr/7gdt43ap/inline/07/73/18__640LTL-en_US-1-reviews.html</inlinefile>
</inlinefiles>
<reviews>
<fullreview>
<id>xxxxx</id>
<merchant_review_id>xxxxx</merchant_review_id>
<merchantuserid/>
<status>Approved</status>
<createddate>2010-08-23</createddate>
<helpfulvotes>0</helpfulvotes>
<nothelpfulvotes>0</nothelpfulvotes>
<source>web</source>
<confirmstatusgroup>
<confirmstatus>Unverified</confirmstatus>
</confirmstatusgroup>
<headline>Test</headline>
<overallrating>5</overallrating>
<taggroup key="pros" name="Pros">
<tag isuseradded="false" count="1">Quality Construction</tag>
<tag isuseradded="false" count="1">Comfortable</tag>
<tag isuseradded="false" count="1">Attractive Design</tag>
</taggroup>
<taggroup key="describeyourself" name="Describe Yourself">
<tag isuseradded="false" count="1">Midrange Shopper</tag>
</taggroup>
<taggroup key="bestuses" name="Best Uses">
<tag isuseradded="false" count="1">Informal Use</tag>
</taggroup>
<taggroup key="primaryuse" name="Primary use">
<tag isuseradded="false" count="1">Business</tag>
</taggroup>
<bottom_line>recommended</bottom_line>
<comments>test</comments>
<nickname>Carol</nickname>
<location>Oregon</location>
<site_id>1</site_id>
</fullreview>
</reviews>
</product>
<product xsi:type="ProductWithReviews" locale="en_US">
<pageid>74-VL601VA</pageid>
<name/>
<smallstarimagelocation>pwr/engine/images/stars_small.gif</smallstarimagelocation>
<largestarimagelocation>pwr/engine/images/stars.gif</largestarimagelocation>
<newestreviewdate>2010-08-24</newestreviewdate>
<oldestreviewdate>2010-08-24</oldestreviewdate>
<averageoverallrating>5</averageoverallrating>
<average_rating_decimal>5</average_rating_decimal>
<fullreviews>1</fullreviews>
<confirmstatusgroup>
<confirmstatus>Verified Reviewer</confirmstatus>
</confirmstatusgroup>
<taggroup key="bestuses" name="Best Uses">
<tag isuseradded="false" count="1">Board meetings</tag>
</taggroup>
<taggroup key="describeyourself" name="Describe Yourself">
<tag isuseradded="false" count="1">Midrange shopper</tag>
</taggroup>
<taggroup key="pros" name="Pros">
<tag isuseradded="false" count="1">Attractive</tag>
<tag isuseradded="false" count="1">Comfortable</tag>
<tag isuseradded="false" count="1">Well built / quality</tag>
</taggroup>
<bottom_line_yes_votes>1</bottom_line_yes_votes>
<bottom_line_no_votes>0</bottom_line_no_votes>
<customerimages>false</customerimages>
<customervideos>false</customervideos>
<inlinefiles>
<inlinefile reviewpage="1">pwr/7gdt43ap/inline/07/80/74__VL601VA-en_US-1-reviews.html</inlinefile>
</inlinefiles>
<reviews>
<fullreview>
<id>xxxxx</id>
<merchant_review_id>xxxxx</merchant_review_id>
<merchantuserid/>
<status>Approved</status>
<createddate>2010-08-24</createddate>
<helpfulvotes>0</helpfulvotes>
<nothelpfulvotes>0</nothelpfulvotes>
<source>web</source>
<confirmstatusgroup>
<confirmstatus>Verified Reviewer</confirmstatus>
</confirmstatusgroup>
<headline>test</headline>
<overallrating>5</overallrating>
<taggroup key="pros" name="Pros">
<tag isuseradded="false" count="1">Comfortable</tag>
<tag isuseradded="false" count="1">Quality Construction</tag>
<tag isuseradded="false" count="1">Attractive Design</tag>
</taggroup>
<taggroup key="describeyourself" name="Describe Yourself">
<tag isuseradded="false" count="1">Midrange Shopper</tag>
</taggroup>
<taggroup key="bestuses" name="Best Uses">
<tag isuseradded="false" count="1">Board Meetings</tag>
</taggroup>
<bottom_line>recommended</bottom_line>
<comments>test</comments>
<nickname>Carol</nickname>
<location>Oregon</location>
<email_address_from_user>xxxxx@xxxxx.com</email_address_from_user>
<site_id>1</site_id>
</fullreview>
</reviews>
</product>
<product xsi:type="ProductWithReviews" locale="en_US">
<pageid>14-FLIP</pageid>
<name>Flip Down Under Cabinet TV Mount for 10" to 18" Displays</name>
<smallstarimagelocation>pwr/engine/images/stars_small.gif</smallstarimagelocation>
<largestarimagelocation>pwr/engine/images/stars.gif</largestarimagelocation>
<newestreviewdate>2010-08-24</newestreviewdate>
<oldestreviewdate>2010-08-24</oldestreviewdate>
<averageoverallrating>4</averageoverallrating>
<average_rating_decimal>4</average_rating_decimal>
<fullreviews>1</fullreviews>
<confirmstatusgroup>
<confirmstatus>Verified Reviewer</confirmstatus>
</confirmstatusgroup>
<taggroup key="describeyourself" name="Describe Yourself">
<tag isuseradded="false" count="1">Quality oriented</tag>
</taggroup>
<taggroup key="primaryuse" name="Primary use">
<tag isuseradded="false" count="1">Personal</tag>
</taggroup>
<taggroup key="pros" name="Pros">
<tag isuseradded="false" count="1">Compact</tag>
<tag isuseradded="false" count="1">Durable</tag>
</taggroup>
<bottom_line_yes_votes>1</bottom_line_yes_votes>
<bottom_line_no_votes>0</bottom_line_no_votes>
<customerimages>false</customerimages>
<customervideos>false</customervideos>
<inlinefiles>
<inlinefile reviewpage="1">pwr/7gdt43ap/inline/05/44/14__FLIP-en_US-1-reviews.html</inlinefile>
</inlinefiles>
<reviews>
<fullreview>
<id>xxxxxxxx</id>
<merchant_review_id>xxxxxxxx</merchant_review_id>
<merchantuserid/>
<status>Approved</status>
<createddate>2010-08-24</createddate>
<helpfulvotes>0</helpfulvotes>
<nothelpfulvotes>0</nothelpfulvotes>
<source>web</source>
<confirmstatusgroup>
<confirmstatus>Verified Reviewer</confirmstatus>
</confirmstatusgroup>
<headline>test</headline>
<overallrating>4</overallrating>
<taggroup key="pros" name="Pros">
<tag isuseradded="false" count="1">Durable</tag>
<tag isuseradded="false" count="1">Compact</tag>
</taggroup>
<taggroup key="describeyourself" name="Describe Yourself">
<tag isuseradded="false" count="1">Quality Oriented</tag>
</taggroup>
<taggroup key="primaryuse" name="Primary use">
<tag isuseradded="false" count="1">Personal</tag>
</taggroup>
<bottom_line>recommended</bottom_line>
<comments>test</comments>
<nickname>carol</nickname>
<location>oregon</location>
<email_address_from_user>xxxx@xxxxxxxxx.com</email_address_from_user>
<site_id>1</site_id>
</fullreview>
</reviews>
</product>
</products>
You might want to consider moving to PHP5. PHP4 has been dead a long time. Not even security fixes are available any more. And with PHP5 you get some valuable new functions, like this:
http://us2.php.net/manual/en/function.simplexml-load-file.php
http://us2.php.net/manual/en/function.simplexml-load-file.php
ASKER
Awesome...thank you so much!
ASKER
And we definitely are considering moving to php5 in the near future. We are currently running CRE Loaded 6.3 for our store, so once we upload to the new version of cre loaded, we'll definitely be updating to php5.
ASKER
apresence, I'm unable to get your script to work. Here is the php4 script I'm using to parse the xml. Any suggestions...help? Thanks!
<?php
$file = "http://www.ergoindemand.com/pwr/7gdt43ap/rawdata/review_data_complete.xml";
function trustedFile($file)
{
// only trust local files owned by ourselves
if (!eregi("^([a-z]+)://", $file)
&& fileowner($file) == getmyuid()) {
return true;
}
return false;
}
function startElement($parser, $name, $attribs)
{
echo "<<font color=\"#0000cc\">$name</f ont>";
if (sizeof($attribs)) {
while (list($k, $v) = each($attribs)) {
echo " <font color=\"#009900\">$k</font >=\"<font
color=\"#990000\">$v</font >\"";
}
}
echo ">";
}
function endElement($parser, $name)
{
echo "</<font color=\"#0000cc\">$name</f ont>>";
}
function characterData($parser, $data)
{
echo "<b>$data</b>";
}
function PIHandler($parser, $target, $data)
{
switch (strtolower($target)) {
case "php":
global $parser_file;
// If the parsed document is "trusted", we say it is safe
// to execute PHP code inside it. If not, display the code
// instead.
if (trustedFile($parser_file[ $parser])) {
eval($data);
} else {
printf("Untrusted PHP code: <i>%s</i>",
htmlspecialchars($data));
}
break;
}
}
function defaultHandler($parser, $data)
{
if (substr($data, 0, 1) == "&" && substr($data, -1, 1) == ";") {
printf('<font color="#aa00aa">%s</font>' ,
htmlspecialchars($data));
} else {
printf('<font size="-1">%s</font>',
htmlspecialchars($data));
}
}
function externalEntityRefHandler($ parser, $openEntityNames, $base, $systemId,
$publicId) {
if ($systemId) {
if (!list($parser, $fp) = new_xml_parser($systemId)) {
printf("Could not open entity %s at %s\n", $openEntityNames,
$systemId);
return false;
}
while ($data = fread($fp, 4096)) {
if (!xml_parse($parser, $data, feof($fp))) {
printf("XML error: %s at line %d while parsing entity %s\n",
xml_error_string(xml_get_e rror_code( $parser)),
xml_get_current_line_numbe r($parser) , $openEntityNames);
xml_parser_free($parser);
return false;
}
}
xml_parser_free($parser);
return true;
}
return false;
}
function new_xml_parser($file)
{
global $parser_file;
$xml_parser = xml_parser_create();
xml_parser_set_option($xml _parser, XML_OPTION_CASE_FOLDING, 1);
xml_set_element_handler($x ml_parser, "startElement", "endElement");
xml_set_character_data_han dler($xml_ parser, "characterData");
xml_set_processing_instruc tion_handl er($xml_pa rser, "PIHandler");
xml_set_default_handler($x ml_parser, "defaultHandler");
xml_set_external_entity_re f_handler( $xml_parse r, "externalEntityRefHandler" );
if (!($fp = @fopen($file, "r"))) {
return false;
}
if (!is_array($parser_file)) {
settype($parser_file, "array");
}
$parser_file[$xml_parser] = $file;
return array($xml_parser, $fp);
}
if (!(list($xml_parser, $fp) = new_xml_parser($file))) {
die("could not open XML input");
}
echo "<pre>";
while ($data = fread($fp, 4096)) {
if (!xml_parse($xml_parser, $data, feof($fp))) {
die(sprintf("XML error: %s at line %d\n",
xml_error_string(xml_get_e rror_code( $xml_parse r)),
xml_get_current_line_numbe r($xml_par ser)));
}
}
echo "</pre>";
echo "parse complete\n";
xml_parser_free($xml_parse r);
?>
<?php
$file = "http://www.ergoindemand.com/pwr/7gdt43ap/rawdata/review_data_complete.xml";
function trustedFile($file)
{
// only trust local files owned by ourselves
if (!eregi("^([a-z]+)://", $file)
&& fileowner($file) == getmyuid()) {
return true;
}
return false;
}
function startElement($parser, $name, $attribs)
{
echo "<<font color=\"#0000cc\">$name</f
if (sizeof($attribs)) {
while (list($k, $v) = each($attribs)) {
echo " <font color=\"#009900\">$k</font
color=\"#990000\">$v</font
}
}
echo ">";
}
function endElement($parser, $name)
{
echo "</<font color=\"#0000cc\">$name</f
}
function characterData($parser, $data)
{
echo "<b>$data</b>";
}
function PIHandler($parser, $target, $data)
{
switch (strtolower($target)) {
case "php":
global $parser_file;
// If the parsed document is "trusted", we say it is safe
// to execute PHP code inside it. If not, display the code
// instead.
if (trustedFile($parser_file[
eval($data);
} else {
printf("Untrusted PHP code: <i>%s</i>",
htmlspecialchars($data));
}
break;
}
}
function defaultHandler($parser, $data)
{
if (substr($data, 0, 1) == "&" && substr($data, -1, 1) == ";") {
printf('<font color="#aa00aa">%s</font>'
htmlspecialchars($data));
} else {
printf('<font size="-1">%s</font>',
htmlspecialchars($data));
}
}
function externalEntityRefHandler($
$publicId) {
if ($systemId) {
if (!list($parser, $fp) = new_xml_parser($systemId))
printf("Could not open entity %s at %s\n", $openEntityNames,
$systemId);
return false;
}
while ($data = fread($fp, 4096)) {
if (!xml_parse($parser, $data, feof($fp))) {
printf("XML error: %s at line %d while parsing entity %s\n",
xml_error_string(xml_get_e
xml_get_current_line_numbe
xml_parser_free($parser);
return false;
}
}
xml_parser_free($parser);
return true;
}
return false;
}
function new_xml_parser($file)
{
global $parser_file;
$xml_parser = xml_parser_create();
xml_parser_set_option($xml
xml_set_element_handler($x
xml_set_character_data_han
xml_set_processing_instruc
xml_set_default_handler($x
xml_set_external_entity_re
if (!($fp = @fopen($file, "r"))) {
return false;
}
if (!is_array($parser_file)) {
settype($parser_file, "array");
}
$parser_file[$xml_parser] = $file;
return array($xml_parser, $fp);
}
if (!(list($xml_parser, $fp) = new_xml_parser($file))) {
die("could not open XML input");
}
echo "<pre>";
while ($data = fread($fp, 4096)) {
if (!xml_parse($xml_parser, $data, feof($fp))) {
die(sprintf("XML error: %s at line %d\n",
xml_error_string(xml_get_e
xml_get_current_line_numbe
}
}
echo "</pre>";
echo "parse complete\n";
xml_parser_free($xml_parse
?>
ASKER CERTIFIED SOLUTION
membership
This solution is only available to members.
To access this solution, you must be a member of Experts Exchange.
ASKER
Works perfectly!!! Thanks! No special reason for the double angle brackets. They are going to be removed as I tweak the script so it only shows up in the html code for seo purposes. Thanks a million!!
I think I see what you were trying to do with the < and > characters now. If you want those to show up on your web page, you should use < and >, respectively.
Here's another version of the script with that fixed.
Here's another version of the script with that fixed.
<?php
$file = "http://www.ergoindemand.com/pwr/7gdt43ap/rawdata/review_data_complete.xml";
function trustedFile($file)
{
// only trust local files owned by ourselves
if (!eregi("^([a-z]+)://", $file)
&& fileowner($file) == getmyuid()) {
return true;
}
return false;
}
function startElement($parser, $name, $attribs)
{
echo "<<font color=\"#0000cc\">$name</font>";
if (sizeof($attribs)) {
while (list($k, $v) = each($attribs)) {
echo " <font color=\"#009900\">$k</font>=\"<font
color=\"#990000\">$v</font>\"";
}
}
echo ">";
}
function endElement($parser, $name)
{
echo "</<font color=\"#0000cc\">$name</font>>";
}
function characterData($parser, $data)
{
echo "<b>$data</b>";
}
function PIHandler($parser, $target, $data)
{
switch (strtolower($target)) {
case "php":
global $parser_file;
// If the parsed document is "trusted", we say it is safe
// to execute PHP code inside it. If not, display the code
// instead.
if (trustedFile($parser_file[$parser])) {
eval($data);
} else {
printf("Untrusted PHP code: <i>%s</i>",
htmlspecialchars($data));
}
break;
}
}
function defaultHandler($parser, $data)
{
if (substr($data, 0, 1) == "&" && substr($data, -1, 1) == ";") {
printf('<font color="#aa00aa">%s</font>',
htmlspecialchars($data));
} else {
printf('<font size="-1">%s</font>',
htmlspecialchars($data));
}
}
function externalEntityRefHandler($parser, $openEntityNames, $base, $systemId,
$publicId) {
if ($systemId) {
if (!list($parser, $fp) = new_xml_parser($systemId)) {
printf("Could not open entity %s at %s\n", $openEntityNames,
$systemId);
return false;
}
while ($data = fread($fp, 4096)) {
if (!xml_parse($parser, $data, feof($fp))) {
printf("XML error: %s at line %d while parsing entity %s\n",
xml_error_string(xml_get_error_code($parser)),
xml_get_current_line_number($parser), $openEntityNames);
xml_parser_free($parser);
return false;
}
}
xml_parser_free($parser);
return true;
}
return false;
}
function new_xml_parser($file)
{
global $parser_file;
$xml_parser = xml_parser_create();
xml_parser_set_option($xml_parser, XML_OPTION_CASE_FOLDING, 1);
xml_set_element_handler($xml_parser, "startElement", "endElement");
xml_set_character_data_handler($xml_parser, "characterData");
xml_set_processing_instruction_handler($xml_parser, "PIHandler");
xml_set_default_handler($xml_parser, "defaultHandler");
xml_set_external_entity_ref_handler($xml_parser, "externalEntityRefHandler");
if (!($fp = @fopen($file, "r"))) {
return false;
}
if (!is_array($parser_file)) {
settype($parser_file, "array");
}
$parser_file[$xml_parser] = $file;
return array($xml_parser, $fp);
}
function apresence_scrub_xml($xml)
{
// Grab 2 lines from xml header
preg_match('|^([^$]+$){2}|imsU', $xml, $matches);
$new_xml = $matches[0];
preg_match_all('|(<product [^<]+<pageid>74\-5002JEE11</pageid>.*</product>)|imsU', $xml, $matches);
if (count($matches) >= 1)
{
// We just want the grouped part...
$grouped = $matches[1];
for ($i=0; $i<count($grouped); $i++)
{
$new_xml .= $grouped[$i];
}
}
// Append the xml footer
$new_xml .= "\n</products>\n";
return $new_xml;
}
if (!(list($xml_parser, $fp) = new_xml_parser($file))) {
die("could not open XML input");
}
// Read all the data into one string
$data = '';
while ($this_data = fread($fp, 4096)) {
$data .= $this_data;
}
// Just return the stuff we're interested in
$data = apresence_scrub_xml($data);
echo "<pre>";
// Uncomment the next line to see the scrubbed xml before parsing
// print_r($new_xml); echo "\n\n\n--------------------\n\n\n";
if (!xml_parse($xml_parser, $data, true)) {
die(sprintf("XML error: %s at line %d\n",
xml_error_string(xml_get_error_code($xml_parser)),
xml_get_current_line_number($xml_parser)));
}
echo "</pre>";
echo "parse complete\n";
xml_parser_free($xml_parser);
?>
I put &lt; and &gt; in that last comment, but of course the browser is interpreting them and showing the characters instead.
I meant it to show like this:
Whenever you want to show < or > on an HTML page, you should use &lt; and &gt; instead.
I meant it to show like this:
Whenever you want to show < or > on an HTML page, you should use &lt; and &gt; instead.
ASKER
I knew exactly what you meant...and thanks again!
Still not right, let's try it like this:
Whenever you want to show < or > on an HTML page, you should use < and > instead.
ASKER
Last issue I have. I need to substitute the sku, "74-5002JEE11" with the variable that represents the sku in mysql since the sku needs to be dynamic for all our various products. The variable called is $products_model. I'm trying to figure out how to incorporate that in your function. Any suggestions?
ASKER
This is what I've tried adding to the function for the product sku to be called wtihin the ... elements within the preg_match_all function.
$products_model = $product_info['products_mo del'];
preg_match_all('|(<product [^<]+<pageid>"/^$products_model$/"</pageid>.*</product>)|ims U', $xml, $matches);
$products_model calls the sku... but I don't think I'm calling it correctly. I just basically need to echo or return the value of $products_model within the string. What am I doing wrong?
$products_model = $product_info['products_mo
preg_match_all('|(<product
$products_model calls the sku... but I don't think I'm calling it correctly. I just basically need to echo or return the value of $products_model within the string. What am I doing wrong?
Since this is unrelated to the original question, please open up a new question and I'll be happy to answer it for you.
ASKER
ok...just posted new question
Testing (I saved your sample input XML file into test10.in):
root@beta:~/exex/test10 $ php test10.php <test10.in
------
<?xml version="1.0" encoding="UTF-8"?>
<name>HON Park Avenue 5000 Series Mid-Back Managers Chair, Henna Cherry/Black Vinyl</name>
<smallstarimagelocation>pw
<largestarimagelocation>pw
<newestreviewdate>2010-08-
<oldestreviewdate>2010-08-
<averageoverallrating>5</a
<average_rating_decimal>5<
<fullreviews>2</fullreview
<confirmstatusgroup>
<confirmstatus>Verified Reviewer</confirmstatus>
</confirmstatusgroup>
<taggroup key="bestuses" name="Best Uses">
<tag isuseradded="false" count="1">Computer chair</tag>
</taggroup>
<taggroup key="describeyourself" name="Describe Yourself">
<tag isuseradded="false" count="2">Midrange shopper</tag>
</taggroup>
<taggroup key="primaryuse" name="Primary use">
<tag isuseradded="false" count="2">Business</tag>
</taggroup>
<taggroup key="pros" name="Pros">
<tag isuseradded="false" count="2">Attractive</tag>
<tag isuseradded="false" count="2">Comfortable</tag
<tag isuseradded="false" count="2">Durable</tag>
<tag isuseradded="false" count="1">Easy to assemble</tag>
<tag isuseradded="false" count="2">Ergonomic</tag>
<tag isuseradded="false" count="2">Good lumbar support</tag>
<tag isuseradded="false" count="2">Rolls smoothly</tag>
</taggroup>
<bottom_line_yes_votes>2</
<bottom_line_no_votes>0</b
<customerimages>false</cus
<customervideos>false</cus
<inlinefiles>
<inlinefile reviewpage="1">pwr/7gdt43a
</inlinefiles>
<reviews>
<fullreview>
<id>xxxxx</id>
<merchant_review_id>xxxxx<
<merchantuserid/>
<status>Approved</status>
<createddate>2010-08-24</c
<helpfulvotes>0</helpfulvo
<nothelpfulvotes>0</nothel
<source>web</source>
<confirmstatusgroup>
<confirmstatus>Verified Reviewer</confirmstatus>
</confirmstatusgroup>
<headline>test</headline>
<overallrating>5</overallr
<taggroup key="pros" name="Pros">
<tag isuseradded="false" count="1">Sturdy</tag>
<tag isuseradded="false" count="1">Rolls Smoothly</tag>
<tag isuseradded="false" count="1">Good Lumbar Support</tag>
<tag isuseradded="false" count="1">Comfortable</tag
<tag isuseradded="false" count="1">Ergonomic</tag>
<tag isuseradded="false" count="1">Attractive Design</tag>
</taggroup>
<taggroup key="describeyourself" name="Describe Yourself">
<tag isuseradded="false" count="1">Midrange Shopper</tag>
</taggroup>
<taggroup key="bestuses" name="Best Uses">
<tag isuseradded="false" count="1">Computer Chair</tag>
</taggroup>
<taggroup key="primaryuse" name="Primary use">
<tag isuseradded="false" count="1">Business</tag>
</taggroup>
<bottom_line>recommended</
<comments>test</comments>
<nickname>carol</nickname>
<location>oregon</location
<email_address_from_user>x
<site_id>1</site_id>
</fullreview>
<fullreview>
<id>xxxxx</id>
<merchant_review_id>xxxxx<
<merchantuserid/>
<status>Approved</status>
<createddate>2010-08-30</c
<helpfulvotes>0</helpfulvo
<nothelpfulvotes>0</nothel
<source>web</source>
<confirmstatusgroup>
<confirmstatus>Verified Reviewer</confirmstatus>
</confirmstatusgroup>
<headline>An awesome chair.</headline>
<overallrating>5</overallr
<taggroup key="pros" name="Pros">
<tag isuseradded="false" count="1">Ergonomic</tag>
<tag isuseradded="false" count="1">Comfortable</tag
<tag isuseradded="false" count="1">Good Lumbar Support</tag>
<tag isuseradded="false" count="1">Easy To Assemble</tag>
<tag isuseradded="false" count="1">Rolls Smoothly</tag>
<tag isuseradded="false" count="1">Attractive Design</tag>
<tag isuseradded="false" count="1">Sturdy</tag>
</taggroup>
<taggroup key="describeyourself" name="Describe Yourself">
<tag isuseradded="false" count="1">Midrange Shopper</tag>
</taggroup>
<taggroup key="primaryuse" name="Primary use">
<tag isuseradded="false" count="1">Business</tag>
</taggroup>
<bottom_line>recommended</
<comments>I bought this chair for my office and love it. I could sit in it all day.</comments>
<nickname>Gerry</nickname>
<location>Grants Pass, OR</location>
<email_address_from_user>x
<site_id>1</site_id>
</fullreview>
</reviews>
root@beta:~/exex/test10 $
Open in new window