Solved

remove carriage return in xml file

Posted on 2004-10-15
4
726 Views
Last Modified: 2012-05-05
when i open my xml file in notepad it is showing like this

  <CM_EVALUATIONS>
        <EMPLID>
            L06910897
        </EMPLID>
        <REVIEW_DT>
            2004/09/15
        </REVIEW_DT>
    </CM_EVALUATIONS>

instead of
    <CM_EVALUATIONS>
        <EMPLID>L06910897</EMPLID>
        <REVIEW_DT>2004/09/15</REVIEW_DT>
    </CM_EVALUATIONS>


i need to remove the carriage returns from inner tags

heres the code :
===================================
public final class XMLBuilderImpl implements XMLBuilder {
    private final Properties props;
    private BufferedWriter out;
    private boolean headerWritten;
    private String encoding;    
    private final String indentStr;
    private final boolean useLineFeed;
    private final boolean quotedAttributeValues;  
    private int openedTags;
    private Stack openedTagsStack;    
   
    XMLBuilderImpl(Properties props, OutputStream out) {
        this.props = props;
        encoding = props.getProperty(PropertyNames.ENCODING,"UTF-8");        
        try {
            this.out = new BufferedWriter(new OutputStreamWriter(out,encoding));
        } catch(UnsupportedEncodingException e) {
            try {
                encoding = "UTF-8";
                this.out = new BufferedWriter(new OutputStreamWriter(out,encoding));
            } catch(UnsupportedEncodingException ex) {}
        }
        int indent = Integer.parseInt(props.getProperty(PropertyNames.INDENT_SIZE,"4"));
        StringBuffer indentStrBuf = new StringBuffer();
        for(int i=0;i<indent;i++) indentStrBuf.append(" ");
        indentStr = indentStrBuf.toString();
        useLineFeed = Boolean.valueOf(props.getProperty(PropertyNames.LINE_FEED,"true")).booleanValue();
        // if true, quotes are used, otherwise apostrophies
        quotedAttributeValues = Boolean.valueOf(props.getProperty(PropertyNames.QUOTED_ATTR_VALUE,"true")).booleanValue();
        openedTagsStack = new Stack();                      
    }
   
    private void writeHeader() throws IOException {
        if(!headerWritten) {
            out.write("<?xml version=\"1.0\" encoding=\""+encoding+"\" ?>"+getLineFeed());
            headerWritten = true;
        }
    }
   
    public void startTag(String qName) throws IOException {
        startTag(qName,null,null);  
    }
   
    public void startTag(
        String qName,
        String[] attrQNames,
        String[] attrValues)
        throws IOException {
       
        writeHeader();
        openedTags++;
        openedTagsStack.push(qName);
        StringBuffer sb = new StringBuffer();
        sb.append("<"+qName);
        if(attrQNames != null) {
            for(int i=0;i<attrQNames.length;i++) {
                sb.append(" "+attrQNames[i] + (quotedAttributeValues? "=\"":"=\'"));
                sb.append(filterAttributeContent(attrValues[i]));
                sb.append(quotedAttributeValues? "\"":"\'");
            }                    
        }        
        sb.append(">");
        out.write(getOffset() + sb.toString() + getLineFeed());                      
    }
   
    public void putEmptyElementTag(String qName) throws IOException {
        putEmptyElementTag(qName,null,null);
    }
   
    public void putEmptyElementTag(
        String qName,
        String[] attrQNames,
        String[] attrValues)
        throws IOException {
               
        writeHeader();
        openedTags++;
        StringBuffer sb = new StringBuffer();
        sb.append("<"+qName);        
        if(attrQNames != null) {
            for(int i=0;i<attrQNames.length;i++) {
                sb.append(" "+attrQNames[i] + (quotedAttributeValues? "=\"":"=\'"));
                sb.append(filterAttributeContent(attrValues[i]));
                sb.append(quotedAttributeValues? "\"":"\'");
            }                  
        }
        sb.append("/>");
        out.write(getOffset() + sb.toString() + getLineFeed());
        openedTags--;      
    }

    public void endTag(String qName)
        throws NonTerminatedTagException, UnexpectedTagTerminationException, IOException {
       
        String expectedName = (String)openedTagsStack.peek();
        if(!qName.equals(expectedName)) {            
            if(openedTagsStack.contains(qName)) { // the tag was not closed properly
                throw new NonTerminatedTagException("Tag <"+qName+"> was not properly terminated");
            } else {
                throw new UnexpectedTagTerminationException("Trying to close not opened <"+qName+"> tag");  
            }
        }
        out.write(getOffset() + "</"+qName+">" + getLineFeed());
        openedTagsStack.pop();        
        openedTags--;
       
    }

    public void putText(String text) throws IOException {
        String _offset = getOffset() + (useLineFeed? indentStr:"");
        out.write(_offset + filterContent(text) + getLineFeed());
    }
   
    public void putCDATA(String data) throws InvalidCDATACharsException, IOException {
          if (data.indexOf("]]>") != -1 )
                throw new InvalidCDATACharsException(data);
                
        out.write(
            getOffset()
                + (useLineFeed ? indentStr : "")
                + "<![CDATA["
                + data
                + "]]>"
                + getLineFeed());
    }

    public void flush() throws IOException {
        out.flush();
    }

    public void close() throws NonTerminatedTagException, IOException {        
        if(openedTagsStack.size()>0) {
            throw new NonTerminatedTagException("Trying to close with unterminated tag(s)");
        }
        out.flush();
        out.close();
    }
           
    private String filterContent(String content) {
        String res = findAndReplace(content,"&","&amp;");
        res = findAndReplace(res,"<","&lt;");
        return res;
    }
   
    private String filterAttributeContent(String content) {
        String res = findAndReplace(content,"&","&amp;");
        res = findAndReplace(res,"<","&lt;");
        res = findAndReplace(res,"\"","&quot;");
        res = findAndReplace(res,"\'","&apos;");
        return res;
    }
   
    /**
     * If either of the arguments is null, the unmodified input string is returned.
     * If input or findWhat is an empty string, the unmodified input string is returned
     */
    private String findAndReplace(String input, String findWhat, String replaceWith) {
        if(input==null || findWhat==null || replaceWith==null ||
           "".equals(input) || "".equals(findWhat)) {
               return input;
        }
        StringBuffer res = new StringBuffer();
        StringTokenizer st = new StringTokenizer(input, findWhat, true);
        while (st.hasMoreTokens()) {
            String token = st.nextToken();
            if (findWhat.equals(token)) {
                res.append(replaceWith);
            } else {
                res.append(token);
            }
        }
        return res.toString();
    }        
   
    private String getOffset() {
        String res = "";
        if(useLineFeed && openedTags>1) {
            for(int i=0; i<openedTags-1; i++) {
                res += indentStr;
            }
        }
        return res;
    }
   
    private String getLineFeed() {
        if(useLineFeed) {
            return "\r\n";
        } else {
            return "";
        }
    }
}

0
Comment
Question by:royalcyber
[X]
Welcome to Experts Exchange

Add your voice to the tech community where 5M+ people just like you are talking about what matters.

  • Help others & share knowledge
  • Earn cash & points
  • Learn & ask questions
4 Comments
 
LVL 15

Accepted Solution

by:
dualsoul earned 250 total points
ID: 12323154
hm....i can't understand why you need to remove whitespaces from tags, because it has absolutely no meaning. Whitespace is a subject to normalize by parser. So, there are no difference between:

<?xml version="1.0" encoding="UTF-8"?>
<CM_EVALUATIONS>
      <EMPLID>
            L06910897
     </EMPLID>
      <REVIEW_DT>
            2004/09/15
     </REVIEW_DT>
</CM_EVALUATIONS>

and

<?xml version="1.0" encoding="UTF-8"?>
<CM_EVALUATIONS>
      <EMPLID>L06910897</EMPLID>
      <REVIEW_DT>2004/09/15</REVIEW_DT>
</CM_EVALUATIONS>


, but if you really want to do that, i can't understand why you write so many Java code for that, it can be done via very simple XSLT:

<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
      <xsl:output method="xml" version="1.0" encoding="UTF-8" indent="yes"/>
      
      <xsl:template match="@* | node()">
            <xsl:copy>
                  <xsl:apply-templates select="@* | node() | text()" />
            </xsl:copy>            
      </xsl:template>
      <xsl:template match="text()">
            <xsl:value-of select="normalize-space(.)" />
      </xsl:template>
</xsl:stylesheet>
0
 
LVL 26

Assisted Solution

by:rdcpro
rdcpro earned 250 total points
ID: 12324481
I'm thinking if whitespace is an issue because of digital signing, you may actually want to canonicalize your XML...

http://xmlhack.com/read.php?item=1122
http://xmlhack.com/read.php?item=1190
http://www.w3.org/TR/2001/REC-xml-c14n-20010315

Regards,
Mike Sharp
0

Featured Post

How Do You Stack Up Against Your Peers?

With today’s modern enterprise so dependent on digital infrastructures, the impact of major incidents has increased dramatically. Grab the report now to gain insight into how your organization ranks against your peers and learn best-in-class strategies to resolve incidents.

Question has a verified solution.

If you are experiencing a similar issue, please ask a related question

Suggested Solutions

Title # Comments Views Activity
PowerShell script to remove string in xml file 3 41
VB.Net. Reading xml value 6 38
MS Access VBA Object Variable Not Set. Can't figure this out. 11 45
C# XML Get Values 4 35
Browsing the questions asked to the Experts of this forum, you will be amazed to see how many times people are headaching about monster regular expressions (regex) to select that specific part of some HTML or XML file they want to extract. The examp…
The Confluence of Individual Knowledge and the Collective Intelligence At this writing (summer 2013) the term API (http://dictionary.reference.com/browse/API?s=t) has made its way into the popular lexicon of the English language.  A few years ago, …
Email security requires an ever evolving service that stays up to date with counter-evolving threats. The Email Laundry perform Research and Development to ensure their email security service evolves faster than cyber criminals. We apply our Threat…
Exchange organizations may use the Journaling Agent of the Transport Service to archive messages going through Exchange. However, if the Transport Service is integrated with some email content management application (such as an antispam), the admini…

749 members asked questions and received personalized solutions in the past 7 days.

Join the community of 500,000 technology professionals and ask your questions.

Join & Ask a Question