• Status: Solved
  • Priority: Medium
  • Security: Public
  • Views: 562
  • Last Modified:

Xpath Java query problem(urgent)

I am unable to make a query to select information from this schema/XML
from every record i need to pull text for
<ims1_2_1:title>
</ims1_2_1:description> and
<ims1_2_1:keyword>
i am looking forward to have some thing like this for each "record" There are two in this doc in the out put I am using j2SE 5.0 no other apis
If some can guide me in forming the XPATH queries will be a big day saver for me
-----

Title: Digital Channels
Description:An introduction to the components and structure of a digital channel.
Keywords:binary symmetric channel, BPSK..........................................etc



      <OAI-PMH xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/                              http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd">
<responseDate>2007-07-23T16:50:52Z</responseDate>
<request metadataPrefix="ims1_2_1" verb="ListRecords">http://cooker.cnx.rice.edu:8080/content/OAI</request>

      <ListRecords>

      <record>

      <header>
<identifier>oai:cooker.cnx.rice.edu:8080:m0102</identifier>
<datestamp>2005-04-13T21:39:12Z</datestamp>
</header>

      <metadata>

      <ims1_2_1:lom xsi:schemaLocation="http://www.imsglobal.org/xsd/imsmd_v1p2 http://www.imsglobal.org/xsd/imsmd_v1p2p4.xsd">

      <ims1_2_1:general>

      <ims1_2_1:title>
<ims1_2_1:langstring xml:lang="en">Digital Channels</ims1_2_1:langstring>
</ims1_2_1:title>
<ims1_2_1:language>en</ims1_2_1:language>

      <ims1_2_1:description>

      <ims1_2_1:langstring xml:lang="en">
An introduction to the components and structure of a digital channel.
</ims1_2_1:langstring>
</ims1_2_1:description>

      <ims1_2_1:keyword>
<ims1_2_1:langstring xml:lang="en">binary symmetric channel</ims1_2_1:langstring>
</ims1_2_1:keyword>

      <ims1_2_1:keyword>
<ims1_2_1:langstring xml:lang="en">BPSK</ims1_2_1:langstring>
</ims1_2_1:keyword>

      <ims1_2_1:keyword>
<ims1_2_1:langstring xml:lang="en">digital communication</ims1_2_1:langstring>
</ims1_2_1:keyword>

      <ims1_2_1:keyword>
<ims1_2_1:langstring xml:lang="en">digital communication receiver</ims1_2_1:langstring>
</ims1_2_1:keyword>

      <ims1_2_1:keyword>
<ims1_2_1:langstring xml:lang="en">digital communication systems</ims1_2_1:langstring>
</ims1_2_1:keyword>

      <ims1_2_1:keyword>
<ims1_2_1:langstring xml:lang="en">error probability</ims1_2_1:langstring>
</ims1_2_1:keyword>

      <ims1_2_1:keyword>
<ims1_2_1:langstring xml:lang="en">FSK</ims1_2_1:langstring>
</ims1_2_1:keyword>

      <ims1_2_1:keyword>
<ims1_2_1:langstring xml:lang="en">fundamental model of communication</ims1_2_1:langstring>
</ims1_2_1:keyword>

      <ims1_2_1:keyword>
<ims1_2_1:langstring xml:lang="en">signal set</ims1_2_1:langstring>
</ims1_2_1:keyword>

      <ims1_2_1:keyword>
<ims1_2_1:langstring xml:lang="en">transition diagrams</ims1_2_1:langstring>
</ims1_2_1:keyword>

      <ims1_2_1:keyword>
<ims1_2_1:langstring xml:lang="en">transmission bandwidth</ims1_2_1:langstring>
</ims1_2_1:keyword>

      <ims1_2_1:structure>

      <ims1_2_1:source>
<ims1_2_1:langstring xml:lang="x-none">LOMv1.0</ims1_2_1:langstring>
</ims1_2_1:source>

      <ims1_2_1:value>
<ims1_2_1:langstring xml:lang="x-none">Mixed</ims1_2_1:langstring>
</ims1_2_1:value>
</ims1_2_1:structure>

      <ims1_2_1:aggregationlevel>

      <ims1_2_1:source>
<ims1_2_1:langstring xml:lang="x-none">LOMv1.0</ims1_2_1:langstring>
</ims1_2_1:source>

      <ims1_2_1:value>
<ims1_2_1:langstring xml:lang="x-none">2</ims1_2_1:langstring>
</ims1_2_1:value>
</ims1_2_1:aggregationlevel>
</ims1_2_1:general>

      <ims1_2_1:lifecycle>

      <ims1_2_1:version>
<ims1_2_1:langstring xml:lang="x-none">2.13</ims1_2_1:langstring>
</ims1_2_1:version>

      <ims1_2_1:contribute>

      <ims1_2_1:role>

      <ims1_2_1:source>
<ims1_2_1:langstring xml:lang="x-none">LOMv1.0</ims1_2_1:langstring>
</ims1_2_1:source>

      <ims1_2_1:value>
<ims1_2_1:langstring xml:lang="x-none">Author</ims1_2_1:langstring>
</ims1_2_1:value>
</ims1_2_1:role>

      <ims1_2_1:centity>

      <ims1_2_1:vcard>

BEGIN:vCard
FN:Don Johnson
EMAIL;INTERNET:dhj@rice.edu
END:vCard
            
</ims1_2_1:vcard>
</ims1_2_1:centity>

      <ims1_2_1:date>
<ims1_2_1:datetime>2005/04/13 16:39:12 GMT-5</ims1_2_1:datetime>
</ims1_2_1:date>
</ims1_2_1:contribute>
</ims1_2_1:lifecycle>

      <ims1_2_1:technical>
<ims1_2_1:format>text/html</ims1_2_1:format>

      <ims1_2_1:location type="URI">
http://cooker.cnx.rice.edu:8080/content/m0102/2.13/
</ims1_2_1:location>

      <ims1_2_1:requirement>

      <ims1_2_1:type>

      <ims1_2_1:source>
<ims1_2_1:langstring xml:lang="x-none">LOMv1.0</ims1_2_1:langstring>
</ims1_2_1:source>

      <ims1_2_1:value>
<ims1_2_1:langstring xml:lang="x-none">Browser</ims1_2_1:langstring>
</ims1_2_1:value>
</ims1_2_1:type>

      <ims1_2_1:name>

      <ims1_2_1:source>
<ims1_2_1:langstring xml:lang="x-none">LOMv1.0</ims1_2_1:langstring>
</ims1_2_1:source>

      <ims1_2_1:value>
<ims1_2_1:langstring xml:lang="x-none">Any</ims1_2_1:langstring>
</ims1_2_1:value>
</ims1_2_1:name>
</ims1_2_1:requirement>
</ims1_2_1:technical>

      <ims1_2_1:rights>

      <ims1_2_1:copyrightandotherrestrictions>

      <ims1_2_1:source>
<ims1_2_1:langstring xml:lang="x-none">LOMv1.0</ims1_2_1:langstring>
</ims1_2_1:source>

      <ims1_2_1:value>
<ims1_2_1:langstring xml:lang="x-none">yes</ims1_2_1:langstring>
</ims1_2_1:value>
</ims1_2_1:copyrightandotherrestrictions>

      <ims1_2_1:description>
<ims1_2_1:langstring xml:lang="en">http://creativecommons.org/licenses/by/1.0</ims1_2_1:langstring>
</ims1_2_1:description>
</ims1_2_1:rights>

      <ims1_2_1:classification>

      <ims1_2_1:purpose>

      <ims1_2_1:source>
<ims1_2_1:langstring xml:lang="x-none">LOMv1.0</ims1_2_1:langstring>
</ims1_2_1:source>

      <ims1_2_1:value>
<ims1_2_1:langstring xml:lang="x-none">Discipline</ims1_2_1:langstring>
</ims1_2_1:value>
</ims1_2_1:purpose>

      <ims1_2_1:keyword>
<ims1_2_1:langstring xml:lang="en">('Science and Technology',)</ims1_2_1:langstring>
</ims1_2_1:keyword>
</ims1_2_1:classification>
</ims1_2_1:lom>
</metadata>
</record>

      <record>

      <header>
<identifier>oai:cooker.cnx.rice.edu:8080:m10657</identifier>
<datestamp>2004-02-25T17:51:54Z</datestamp>
</header>

      <metadata>

      <ims1_2_1:lom xsi:schemaLocation="http://www.imsglobal.org/xsd/imsmd_v1p2 http://www.imsglobal.org/xsd/imsmd_v1p2p4.xsd">

      <ims1_2_1:general>

      <ims1_2_1:title>
<ims1_2_1:langstring xml:lang="en">Communications: Using Direct Digital Synthesis</ims1_2_1:langstring>
</ims1_2_1:title>
<ims1_2_1:language>en</ims1_2_1:language>

      <ims1_2_1:description>

      <ims1_2_1:langstring xml:lang="en">
This module introduces Direct Digital Synthesis with prepared code that transmits arbitrary input as an FM radio signal.  The module then explains how to program the DDS hardware and concludes with a simple Frequency Shift Keying exercise.
</ims1_2_1:langstring>
</ims1_2_1:description>

      <ims1_2_1:keyword>
<ims1_2_1:langstring xml:lang="en">communications</ims1_2_1:langstring>
</ims1_2_1:keyword>

      <ims1_2_1:keyword>
<ims1_2_1:langstring xml:lang="en">digital signal processing</ims1_2_1:langstring>
</ims1_2_1:keyword>

      <ims1_2_1:keyword>
<ims1_2_1:langstring xml:lang="en">direct digital synthesis</ims1_2_1:langstring>
</ims1_2_1:keyword>

      <ims1_2_1:keyword>
<ims1_2_1:langstring xml:lang="en">transmitter</ims1_2_1:langstring>
</ims1_2_1:keyword>

      <ims1_2_1:keyword>
<ims1_2_1:langstring xml:lang="en">wireless</ims1_2_1:langstring>
</ims1_2_1:keyword>

      <ims1_2_1:structure>

      <ims1_2_1:source>
<ims1_2_1:langstring xml:lang="x-none">LOMv1.0</ims1_2_1:langstring>
</ims1_2_1:source>

      <ims1_2_1:value>
<ims1_2_1:langstring xml:lang="x-none">Mixed</ims1_2_1:langstring>
</ims1_2_1:value>
</ims1_2_1:structure>

      <ims1_2_1:aggregationlevel>

      <ims1_2_1:source>
<ims1_2_1:langstring xml:lang="x-none">LOMv1.0</ims1_2_1:langstring>
</ims1_2_1:source>

      <ims1_2_1:value>
<ims1_2_1:langstring xml:lang="x-none">2</ims1_2_1:langstring>
</ims1_2_1:value>
</ims1_2_1:aggregationlevel>
</ims1_2_1:general>

      <ims1_2_1:lifecycle>

      <ims1_2_1:version>
<ims1_2_1:langstring xml:lang="x-none">2.5</ims1_2_1:langstring>
</ims1_2_1:version>

      <ims1_2_1:contribute>

      <ims1_2_1:role>

      <ims1_2_1:source>
<ims1_2_1:langstring xml:lang="x-none">LOMv1.0</ims1_2_1:langstring>
</ims1_2_1:source>

      <ims1_2_1:value>
<ims1_2_1:langstring xml:lang="x-none">Author</ims1_2_1:langstring>
</ims1_2_1:value>
</ims1_2_1:role>

      <ims1_2_1:centity>

      <ims1_2_1:vcard>

BEGIN:vCard
FN:Matthew Berry
EMAIL;INTERNET:mjberry@uiuc.edu
END:vCard
            
</ims1_2_1:vcard>
</ims1_2_1:centity>

      <ims1_2_1:date>
<ims1_2_1:datetime>2004/02/25 11:51:54 US/Central</ims1_2_1:datetime>
</ims1_2_1:date>
</ims1_2_1:contribute>
</ims1_2_1:lifecycle>

      <ims1_2_1:technical>
<ims1_2_1:format>text/html</ims1_2_1:format>

      <ims1_2_1:location type="URI">
http://cooker.cnx.rice.edu:8080/content/m10657/2.5/
</ims1_2_1:location>

      <ims1_2_1:requirement>

      <ims1_2_1:type>

      <ims1_2_1:source>
<ims1_2_1:langstring xml:lang="x-none">LOMv1.0</ims1_2_1:langstring>
</ims1_2_1:source>

      <ims1_2_1:value>
<ims1_2_1:langstring xml:lang="x-none">Browser</ims1_2_1:langstring>
</ims1_2_1:value>
</ims1_2_1:type>

      <ims1_2_1:name>

      <ims1_2_1:source>
<ims1_2_1:langstring xml:lang="x-none">LOMv1.0</ims1_2_1:langstring>
</ims1_2_1:source>

      <ims1_2_1:value>
<ims1_2_1:langstring xml:lang="x-none">Any</ims1_2_1:langstring>
</ims1_2_1:value>
</ims1_2_1:name>
</ims1_2_1:requirement>
</ims1_2_1:technical>

      <ims1_2_1:rights>

      <ims1_2_1:copyrightandotherrestrictions>

      <ims1_2_1:source>
<ims1_2_1:langstring xml:lang="x-none">LOMv1.0</ims1_2_1:langstring>
</ims1_2_1:source>

      <ims1_2_1:value>
<ims1_2_1:langstring xml:lang="x-none">yes</ims1_2_1:langstring>
</ims1_2_1:value>
</ims1_2_1:copyrightandotherrestrictions>

      <ims1_2_1:description>
<ims1_2_1:langstring xml:lang="en">http://creativecommons.org/licenses/by/1.0</ims1_2_1:langstring>
</ims1_2_1:description>
</ims1_2_1:rights>

      <ims1_2_1:classification>

      <ims1_2_1:purpose>

      <ims1_2_1:source>
<ims1_2_1:langstring xml:lang="x-none">LOMv1.0</ims1_2_1:langstring>
</ims1_2_1:source>

      <ims1_2_1:value>
<ims1_2_1:langstring xml:lang="x-none">Discipline</ims1_2_1:langstring>
</ims1_2_1:value>
</ims1_2_1:purpose>

      <ims1_2_1:keyword>
<ims1_2_1:langstring xml:lang="en">('Science and Technology',)</ims1_2_1:langstring>
</ims1_2_1:keyword>
</ims1_2_1:classification>
</ims1_2_1:lom>
</metadata>
</record>
</ListRecords>
</OAI-PMH
0
mostenscer
Asked:
mostenscer
  • 9
  • 8
  • 4
  • +1
1 Solution
 
CEHJCommented:
>>
<ims1_2_1:title>
</ims1_2_1:description> and
<ims1_2_1:keyword>
>>

None of those is a text node according to your xml. Each is nested in another
0
 
mostenscerAuthor Commented:
<ims1_2_1:title>
<ims1_2_1:langstring xml:lang="en">Digital Channels</ims1_2_1:langstring>
</ims1_2_1:title>
cant we pull title from this part i.e  "Digital Channels"
so are the other stuff like description and keywords
?? i am no XML/XPath exper:(
0
 
CEHJCommented:
>>cant we pull title from this part i.e  "Digital Channels"

That's not title, it's

//ims1_2_1:title/ims1_2_1:langstring
0
What does it mean to be "Always On"?

Is your cloud always on? With an Always On cloud you won't have to worry about downtime for maintenance or software application code updates, ensuring that your bottom line isn't affected.

 
mostenscerAuthor Commented:
let me ho the basic structure of the doc
<OAI-PMH>
<ListRecords>
<record> Here are the elements for title description keywords etc </record>
<record></record>
......
...
</ListRecords>
</OAI-PMH>
0
 
CEHJCommented:
The pattern you need is what i posted
0
 
mostenscerAuthor Commented:
NodeList nodeList = (NodeList) xpath.evaluate("//ims1_2_1:title/ims1_2_1:langstring", doc, XPathConstants.NODESET);
System.out.println(nodeList.getLength()); prints 0 (zero)
0
 
CEHJCommented:
Make sure you've got the namespace ims1_2_1 defined properly in the doc
0
 
mostenscerAuthor Commented:

I am afraid I havent and dont know how to.  here is my code pretty small to look at
-----------------------
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathFactory;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;

public class pratcice {
      static Document doc;
      /**
       * @param args
       */
      public static void main(String[] args)  {
            try{
              DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
      
              DocumentBuilder parser = factory.newDocumentBuilder();
             doc = parser.parse("http://depot.cnx.rice.edu:8080/content/OAI?verb=SearchRecords&metadataPrefix=ims1_2_1&query:list=dsp%20and%20C%20language&b_size=-1");
   //  System.out.println(doc.toString());
      
            XPath xpath = XPathFactory.newInstance().newXPath();
            // Evaluate the document against the pattern
      //      NodeList nodeList = (NodeList) xpath.evaluate("//identifier/text()", doc, XPathConstants.NODESET); works
            
            NodeList nodeList = (NodeList) xpath.evaluate("//ims1_2_1:title/ims1_2_1:langstring", doc, XPathConstants.NODESET);
            
            System.out.println(nodeList.getLength());
            if (nodeList.getLength() > 0) {
                  for(int i = 0;i < nodeList.getLength();i++) {
                        System.out.println(nodeList.item(i).getNodeValue());
                     
                  }
            }      

            } catch (Exception e){e.printStackTrace();}
      }

}
0
 
CEHJCommented:
It's your document you need to change. See

http://en.wikipedia.org/wiki/XML_namespace
0
 
mostenscerAuthor Commented:
its a third party document i have no control over it.  is there any work around? some dummy namespace? or any thing?
0
 
jkmyoungCommented:
Your document persay does not need to change; rather you need a namespace context. Use XPath.setNamespaceContext()
http://java.sun.com/j2se/1.5.0/docs/api/javax/xml/xpath/XPath.html#setNamespaceContext(javax.xml.namespace.NamespaceContext)

For help in implementing a namespace context, look at:
http://www.oreillynet.com/cs/user/view/cs_msg/50304
which presents a fairly simple solution for setting up a namespaceContext class.
0
 
CEHJCommented:
I'm not sure, and i'm not even sure the namespace *is* wrong actually. You could post a link to this q in a more specialized topic area
0
 
CEHJCommented:
They seem to have done *something* to the namespace and even refer to it in the docs in http://www.imsglobal.org/xsd/imsmd_v1p2p4.xsd
0
 
mostenscerAuthor Commented:
I have changed the doc as per  jkmyoung but no help. mind having a look ?
----------------class one for Xpath
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathFactory;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;

public class pratcice {
      static Document doc;
      
      public static void main(String[] args)  {
            try{
              DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
              DocumentBuilder parser = factory.newDocumentBuilder();
             doc = parser.parse("http://depot.cnx.rice.edu:8080/content/OAI?verb=SearchRecords&metadataPrefix=ims1_2_1&query:list=dsp%20and%20C%20language&b_size=-1");
            XPath xpath = XPathFactory.newInstance().newXPath();
            PersonalNamespaceContext      nsctx = new PersonalNamespaceContext("ims1_2_1", "http://cooker.cnx.rice.edu:8080/content/OAI");
            xpath.setNamespaceContext(nsctx);
            // Evaluate the document against the pattern
      //      NodeList nodeList = (NodeList) xpath.evaluate("//identifier/text()", doc, XPathConstants.NODESET);
            //NodeList nodeList = (NodeList) xpath.evaluate("//record/metadata/*", doc, XPathConstants.NODESET);
            NodeList nodeList =
                  (NodeList) xpath.evaluate("ims1_2_1//ims1_2_1:title/ims1_2_1:langstring",
                                                 doc, XPathConstants.NODESET);
            System.out.println(nodeList.getLength());
            if (nodeList.getLength() > 0) {
                  for(int i = 0;i < nodeList.getLength();i++) {
                        System.out.println(nodeList.item(i).getNodeValue());  
                  }
            }      
       } catch (Exception e){e.printStackTrace();}
      }
}
-----------------orielly class-------
package mpkg;


import java.util.Iterator;
import javax.xml.*;
import javax.xml.namespace.NamespaceContext;

public class PersonalNamespaceContext implements NamespaceContext{



       public String uri;

       public String prefix;



       public PersonalNamespaceContext(){}



       public PersonalNamespaceContext(String prefix, String uri){

           this.uri=uri;

           this.prefix=prefix;

          }



       public String getNamespaceURI(String prefix){

         return uri;

       }

       public void setNamespaceURI(String uri){

         this.uri=uri;

       }



       public String getPrefix(String uri){

         return prefix;

       }



       public void setPrefix(String prefix){

         this.prefix=prefix;

       }

       public Iterator getPrefixes(String uri){return null;}



      }
0
 
CEHJCommented:
>> (NodeList) xpath.evaluate("ims1_2_1//ims1_2_1:title/ims1_2_1:langstring",
                                                 doc, XPathConstants.NODESET);

should be

 (NodeList) xpath.evaluate("//ims1_2_1:title/ims1_2_1:langstring",
                                                 doc, XPathConstants.NODESET);
0
 
mostenscerAuthor Commented:
:( no luck
0
 
mostenscerAuthor Commented:
btw what could be more specfic forum? where i need to post this question?
0
 
petiexCommented:
The key is to ignore the namespace altogether in your XPath, and, to get the title strings, you want the node value of the child TextNode of the title node; the nodeValue of any Element is null:
      . . .

      static Document doc;
      /**
       * @param args
       */
      public static void main(String[] args)  {
            try{
              DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
     
              DocumentBuilder parser = factory.newDocumentBuilder();
             doc = parser.parse("http://depot.cnx.rice.edu:8080/content/OAI?verb=SearchRecords&metadataPrefix=ims1_2_1&query:list=dsp%20and%20C%20language&b_size=-1");
            XPath xpath = XPathFactory.newInstance().newXPath();
           
            //NodeList nodeList = (NodeList) xpath.evaluate("//ims1_2_1:title/ims1_2_1:langstring", doc.getDocumentElement(), XPathConstants.NODESET);
            NodeList nodeList = (NodeList) xpath.evaluate("//title/langstring", doc.getDocumentElement(), XPathConstants.NODESET);
            System.out.println(nodeList.getLength());
            if (nodeList.getLength() > 0) {
                  for(int i = 0;i < nodeList.getLength();i++) {
                        System.out.println(nodeList.item(i).getFirstChild().getNodeValue());
                     
                  }
            }      

            } catch (Exception e){e.printStackTrace();}
      }
}

Here is the output:

Compiling 1 source file to C:\Development\Workspace\FlexStar\build\web\WEB-INF\classes
compile-single:
run-main:
12
C62x Assembly Primer 1
Digital Transmitter: Processor Optimization Exercise for Frequency Shift Keying
Digital Transmitter: Processor Optimization Exercise for Frequency Shift Keying
FIR Filtering on the TI TMS320C55x
Fixed Point Arithmetic
Introduction to the IDK
Spectrum Analyzer: Processor Exercise Using C Language with C Introduction
Spectrum Analyzer: Processor Exercise Using C Language with C Introduction
Spectrum Analyzer: Processor Exercise Using C Language with C Introduction
Spectrum Analyzer: VPO/VISTA Optimization Exercise
Three Special Events in the History of Technology for Creating, Organizing, and Sharing Information
Video Processing Part 1: Introductory Exercise
0
 
petiexCommented:
By the way, my addition of getDocumentElement() is unnecessary in the line:

NodeList nodeList = (NodeList) xpath.evaluate("//title/langstring", doc.getDocumentElement(), XPathConstants.NODESET);

This will work just as well:
NodeList nodeList = (NodeList) xpath.evaluate("//title/langstring", doc, XPathConstants.NODESET);
0
 
petiexCommented:
Since I just can't stop picking at it, here is a version of the class that uses the namespace. The main difference is you need to set the DocumentBuilderFactory to namespaceAware.

The following code works against the xml served from the rice.edu uri, but causes the parser to choke on the xml you posted. I'm guessing you copied it from the Firefox display of the xml, which does not render namespace declarations (things like xmlns:ims1_2_1="http://www.imsglobal.org/xsd/imsmd_v1p2" ). To get the full xml document from a browser, you either need to get the view-source from Firefox, or use IE, which does render namespace declarations.

import java.util.Iterator;
import javax.xml.namespace.NamespaceContext;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathFactory;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;

public class XMLParser {
      static Document doc;
      /**
       * @param args
       */
      public static void main(String[] args)  {
            try{
              DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
              factory.setNamespaceAware(true);
              DocumentBuilder parser = factory.newDocumentBuilder();
             doc = parser.parse("http://depot.cnx.rice.edu:8080/content/OAI?verb=SearchRecords&metadataPrefix=ims1_2_1&query:list=dsp%20and%20C%20language&b_size=-1");
            XPath xpath = XPathFactory.newInstance().newXPath();
            NamespaceContext nsctx = new MyNamespaceContext();
            xpath.setNamespaceContext(nsctx);
            NodeList nodeList = (NodeList) xpath.evaluate("//ims1_2_1:title/ims1_2_1:langstring", doc, XPathConstants.NODESET);
            System.out.println(nodeList.getLength());
            if (nodeList.getLength() > 0) {
                  for(int i = 0;i < nodeList.getLength();i++) {
                        System.out.println(nodeList.item(i).getFirstChild().getNodeValue());                    
                  }
            }      

            } catch (Exception e){e.printStackTrace();}
      }


        static  private class MyNamespaceContext implements NamespaceContext {
        public String getNamespaceURI(String prefix)
        {
            return "http://www.imsglobal.org/xsd/imsmd_v1p2";
        }
       
        public String getPrefix(String namespace)
        {
            return "ims1_2_1";
        }

        public Iterator getPrefixes(String namespace)
        {
            return null;
        }
    }  
}
0
 
petiexCommented:
And here's a version using an xpath string that matches the child text node of each langstring node whose parent is either title, description or keyword
("//ims1_2_1:langstring[parent::ims1_2_1:title or parent::ims1_2_1:description or parent::ims1_2_1:keyword]/text()")

import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import java.util.Iterator;
import javax.xml.namespace.NamespaceContext;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathFactory;
public class XMLParser {
    static Document doc;
    public static void main(String[] args) {
        try {
            DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
            factory.setNamespaceAware(true);
            DocumentBuilder parser = factory.newDocumentBuilder();
            doc = parser.parse( "http://depot.cnx.rice.edu:8080/content/OAI?verb=SearchRecords&metadataPrefix=ims1_2_1&query:list=dsp%20and%20C%20language&b_size=-1");
            XPath xpath = XPathFactory.newInstance().newXPath();
            NamespaceContext nsctx = new MyNamespaceContext();
            xpath.setNamespaceContext(nsctx);
            NodeList nodeList = (NodeList) xpath.evaluate("//ims1_2_1:langstring[parent::ims1_2_1:title or parent::ims1_2_1:description or parent::ims1_2_1:keyword]/text()",
                    doc, XPathConstants.NODESET);
            System.out.println(nodeList.getLength());

            if (nodeList.getLength() > 0) {
                for (int i = 0; i < nodeList.getLength(); i++) {
                    Node text = nodeList.item(i);
                    if (text != null) {
                        System.out.print(text.getParentNode().getParentNode()
                                             .getLocalName() + ": ");
                        System.out.println(text.getNodeValue());
                    }
                }
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
    static private class MyNamespaceContext implements NamespaceContext {
        public String getNamespaceURI(String prefix) {
            return "http://www.imsglobal.org/xsd/imsmd_v1p2";
        }
        public String getPrefix(String namespace) {
            return "ims1_2_1";
        }
        //this never really gets used.
        public Iterator getPrefixes(String namespace) {
            return null;
        }
    }
}
0

Featured Post

Upgrade your Question Security!

Add Premium security features to your question to ensure its privacy or anonymity. Learn more about your ability to control Question Security today.

  • 9
  • 8
  • 4
  • +1
Tackle projects and never again get stuck behind a technical roadblock.
Join Now