Solved

remove carriage return in xml file

Posted on 2004-10-15
4
722 Views
Last Modified: 2012-05-05
when i open my xml file in notepad it is showing like this

  <CM_EVALUATIONS>
        <EMPLID>
            L06910897
        </EMPLID>
        <REVIEW_DT>
            2004/09/15
        </REVIEW_DT>
    </CM_EVALUATIONS>

instead of
    <CM_EVALUATIONS>
        <EMPLID>L06910897</EMPLID>
        <REVIEW_DT>2004/09/15</REVIEW_DT>
    </CM_EVALUATIONS>


i need to remove the carriage returns from inner tags

heres the code :
===================================
public final class XMLBuilderImpl implements XMLBuilder {
    private final Properties props;
    private BufferedWriter out;
    private boolean headerWritten;
    private String encoding;    
    private final String indentStr;
    private final boolean useLineFeed;
    private final boolean quotedAttributeValues;  
    private int openedTags;
    private Stack openedTagsStack;    
   
    XMLBuilderImpl(Properties props, OutputStream out) {
        this.props = props;
        encoding = props.getProperty(PropertyNames.ENCODING,"UTF-8");        
        try {
            this.out = new BufferedWriter(new OutputStreamWriter(out,encoding));
        } catch(UnsupportedEncodingException e) {
            try {
                encoding = "UTF-8";
                this.out = new BufferedWriter(new OutputStreamWriter(out,encoding));
            } catch(UnsupportedEncodingException ex) {}
        }
        int indent = Integer.parseInt(props.getProperty(PropertyNames.INDENT_SIZE,"4"));
        StringBuffer indentStrBuf = new StringBuffer();
        for(int i=0;i<indent;i++) indentStrBuf.append(" ");
        indentStr = indentStrBuf.toString();
        useLineFeed = Boolean.valueOf(props.getProperty(PropertyNames.LINE_FEED,"true")).booleanValue();
        // if true, quotes are used, otherwise apostrophies
        quotedAttributeValues = Boolean.valueOf(props.getProperty(PropertyNames.QUOTED_ATTR_VALUE,"true")).booleanValue();
        openedTagsStack = new Stack();                      
    }
   
    private void writeHeader() throws IOException {
        if(!headerWritten) {
            out.write("<?xml version=\"1.0\" encoding=\""+encoding+"\" ?>"+getLineFeed());
            headerWritten = true;
        }
    }
   
    public void startTag(String qName) throws IOException {
        startTag(qName,null,null);  
    }
   
    public void startTag(
        String qName,
        String[] attrQNames,
        String[] attrValues)
        throws IOException {
       
        writeHeader();
        openedTags++;
        openedTagsStack.push(qName);
        StringBuffer sb = new StringBuffer();
        sb.append("<"+qName);
        if(attrQNames != null) {
            for(int i=0;i<attrQNames.length;i++) {
                sb.append(" "+attrQNames[i] + (quotedAttributeValues? "=\"":"=\'"));
                sb.append(filterAttributeContent(attrValues[i]));
                sb.append(quotedAttributeValues? "\"":"\'");
            }                    
        }        
        sb.append(">");
        out.write(getOffset() + sb.toString() + getLineFeed());                      
    }
   
    public void putEmptyElementTag(String qName) throws IOException {
        putEmptyElementTag(qName,null,null);
    }
   
    public void putEmptyElementTag(
        String qName,
        String[] attrQNames,
        String[] attrValues)
        throws IOException {
               
        writeHeader();
        openedTags++;
        StringBuffer sb = new StringBuffer();
        sb.append("<"+qName);        
        if(attrQNames != null) {
            for(int i=0;i<attrQNames.length;i++) {
                sb.append(" "+attrQNames[i] + (quotedAttributeValues? "=\"":"=\'"));
                sb.append(filterAttributeContent(attrValues[i]));
                sb.append(quotedAttributeValues? "\"":"\'");
            }                  
        }
        sb.append("/>");
        out.write(getOffset() + sb.toString() + getLineFeed());
        openedTags--;      
    }

    public void endTag(String qName)
        throws NonTerminatedTagException, UnexpectedTagTerminationException, IOException {
       
        String expectedName = (String)openedTagsStack.peek();
        if(!qName.equals(expectedName)) {            
            if(openedTagsStack.contains(qName)) { // the tag was not closed properly
                throw new NonTerminatedTagException("Tag <"+qName+"> was not properly terminated");
            } else {
                throw new UnexpectedTagTerminationException("Trying to close not opened <"+qName+"> tag");  
            }
        }
        out.write(getOffset() + "</"+qName+">" + getLineFeed());
        openedTagsStack.pop();        
        openedTags--;
       
    }

    public void putText(String text) throws IOException {
        String _offset = getOffset() + (useLineFeed? indentStr:"");
        out.write(_offset + filterContent(text) + getLineFeed());
    }
   
    public void putCDATA(String data) throws InvalidCDATACharsException, IOException {
          if (data.indexOf("]]>") != -1 )
                throw new InvalidCDATACharsException(data);
                
        out.write(
            getOffset()
                + (useLineFeed ? indentStr : "")
                + "<![CDATA["
                + data
                + "]]>"
                + getLineFeed());
    }

    public void flush() throws IOException {
        out.flush();
    }

    public void close() throws NonTerminatedTagException, IOException {        
        if(openedTagsStack.size()>0) {
            throw new NonTerminatedTagException("Trying to close with unterminated tag(s)");
        }
        out.flush();
        out.close();
    }
           
    private String filterContent(String content) {
        String res = findAndReplace(content,"&","&amp;");
        res = findAndReplace(res,"<","&lt;");
        return res;
    }
   
    private String filterAttributeContent(String content) {
        String res = findAndReplace(content,"&","&amp;");
        res = findAndReplace(res,"<","&lt;");
        res = findAndReplace(res,"\"","&quot;");
        res = findAndReplace(res,"\'","&apos;");
        return res;
    }
   
    /**
     * If either of the arguments is null, the unmodified input string is returned.
     * If input or findWhat is an empty string, the unmodified input string is returned
     */
    private String findAndReplace(String input, String findWhat, String replaceWith) {
        if(input==null || findWhat==null || replaceWith==null ||
           "".equals(input) || "".equals(findWhat)) {
               return input;
        }
        StringBuffer res = new StringBuffer();
        StringTokenizer st = new StringTokenizer(input, findWhat, true);
        while (st.hasMoreTokens()) {
            String token = st.nextToken();
            if (findWhat.equals(token)) {
                res.append(replaceWith);
            } else {
                res.append(token);
            }
        }
        return res.toString();
    }        
   
    private String getOffset() {
        String res = "";
        if(useLineFeed && openedTags>1) {
            for(int i=0; i<openedTags-1; i++) {
                res += indentStr;
            }
        }
        return res;
    }
   
    private String getLineFeed() {
        if(useLineFeed) {
            return "\r\n";
        } else {
            return "";
        }
    }
}

0
Comment
Question by:royalcyber
4 Comments
 
LVL 15

Accepted Solution

by:
dualsoul earned 250 total points
ID: 12323154
hm....i can't understand why you need to remove whitespaces from tags, because it has absolutely no meaning. Whitespace is a subject to normalize by parser. So, there are no difference between:

<?xml version="1.0" encoding="UTF-8"?>
<CM_EVALUATIONS>
      <EMPLID>
            L06910897
     </EMPLID>
      <REVIEW_DT>
            2004/09/15
     </REVIEW_DT>
</CM_EVALUATIONS>

and

<?xml version="1.0" encoding="UTF-8"?>
<CM_EVALUATIONS>
      <EMPLID>L06910897</EMPLID>
      <REVIEW_DT>2004/09/15</REVIEW_DT>
</CM_EVALUATIONS>


, but if you really want to do that, i can't understand why you write so many Java code for that, it can be done via very simple XSLT:

<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
      <xsl:output method="xml" version="1.0" encoding="UTF-8" indent="yes"/>
      
      <xsl:template match="@* | node()">
            <xsl:copy>
                  <xsl:apply-templates select="@* | node() | text()" />
            </xsl:copy>            
      </xsl:template>
      <xsl:template match="text()">
            <xsl:value-of select="normalize-space(.)" />
      </xsl:template>
</xsl:stylesheet>
0
 
LVL 26

Assisted Solution

by:rdcpro
rdcpro earned 250 total points
ID: 12324481
I'm thinking if whitespace is an issue because of digital signing, you may actually want to canonicalize your XML...

http://xmlhack.com/read.php?item=1122
http://xmlhack.com/read.php?item=1190
http://www.w3.org/TR/2001/REC-xml-c14n-20010315

Regards,
Mike Sharp
0

Featured Post

Master Your Team's Linux and Cloud Stack!

The average business loses $13.5M per year to ineffective training (per 1,000 employees). Keep ahead of the competition and combine in-person quality with online cost and flexibility by training with Linux Academy.

Question has a verified solution.

If you are experiencing a similar issue, please ask a related question

The Problem How to write an Xquery that works like a SQL outer join, providing placeholders for absent data on the outer side?  I give a bit more background at the end. The situation expressed as relational data Let’s work through this.  I’ve …
Many times as a report developer I've been asked to display normalized data such as three rows with values Jack, Joe, and Bob as a single comma-separated string such as 'Jack, Joe, Bob', and vice versa.  Here's how to do it. 
This video shows how to use Hyena, from SystemTools Software, to bulk import 100 user accounts from an external text file. View in 1080p for best video quality.
A short tutorial showing how to set up an email signature in Outlook on the Web (previously known as OWA). For free email signatures designs, visit https://www.mail-signatures.com/articles/signature-templates/?sts=6651 If you want to manage em…

861 members asked questions and received personalized solutions in the past 7 days.

Join the community of 500,000 technology professionals and ask your questions.

Join & Ask a Question