AlHal2
asked on
convert xml file to csv
I have some Excel files with elements and attributes all of different values.
I would like to have Elements and attributes as column headings. Where attributes are used as headings the element name should proceed the attribute name with an _ separating them.
The value of the element should have the element name followed by the word "value in the header as in AdminStatus_Value in the example below.
For example
<OrganizationId>4295856130 </Organiza tionId><Ad minStatus effectiveFrom="2009-07-21T 17:02:06"> Published< /AdminStat us><Organi zationName effectiveFrom="2005-08-18T 04:00:00" organizationNameTypeCode=" AKA" languageId="505062" organizationNameLocalNorma lized="TX" sourceType="11" sourceLink="http://www.nyse.com/about/listed/tx.html" sourceUpdateDate="2012-04- 03T07:22:3 6">TX</Org anizationN ame>
should have as headings:
OrganizationID,
AdminStatus_EffectiveFrom,
AdminStatusValue
OrganizationName_Effective From,
OrganizationName_organizat ionNameTyp eCode,
OrganizationName_LanguageI D,
OrganizationName_organizat ionNameLoc alNormaliz ed,
OrganizationName_SourceTyp e
OrganizationName_SourceLin k
OrganizationName_SourceUpd ateDate
I would like to have Elements and attributes as column headings. Where attributes are used as headings the element name should proceed the attribute name with an _ separating them.
The value of the element should have the element name followed by the word "value in the header as in AdminStatus_Value in the example below.
For example
<OrganizationId>4295856130
should have as headings:
OrganizationID,
AdminStatus_EffectiveFrom,
AdminStatusValue
OrganizationName_Effective
OrganizationName_organizat
OrganizationName_LanguageI
OrganizationName_organizat
OrganizationName_SourceTyp
OrganizationName_SourceLin
OrganizationName_SourceUpd
ASKER
Here is the first part of the file.
<?xml version='1.0' encoding='utf-8'?><env:Con tentEnvelo pe majVers='3' minVers='2.3' pubStyle='Incremental' xmlns:env='http://data.schemas.tfn.thomson.com/Envelope/2008-05-01/' xmlns:cmn='http://data.schemas.financial.thomsonreuters.com/Common/2009-09-01/' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' xmlns='http://oa.schemas.tfn.thomson.com/Organization/2010-05-01/' xsi:schemaLocation='http://oa.schemas.tfn.thomson.com/Organization/2010-05-01/ OrganizationMaster.xsd'><e nv:Header> <env:Info> <env:Id>ur n:uuid:843 a1d51-445a -403e-b062 -88430a6af f26</env:I d><env:Tim eStamp>201 5-01-19T13 :54:23</en v:TimeStam p></env:In fo></env:H eader><env :Body majVers='3' minVers='3.0' contentSet='OA'>
<env:ContentItem action="Overwrite"><env:Da ta xsi:type="OrganizationData Item"><Org anization entityCreatedDate="2009-07 -21T17:02: 06" entityModifiedDate="2015-0 1-19T05:31 :48" isOrganizationVerified="tr ue" isOrganizationManaged="tru e" isOrganizationFinancialLoc alBranch=" false" isOrganizationFinancialFor eignBranch ="false" isOrganizationOtherBrancha ndDivision ="false" entityLastReviewedDate="20 14-07-17T0 4:00:00" organizationTier="1"><Orga nizationId >429585613 0</Organiz ationId><A dminStatus effectiveFrom="2009-07-21T 17:02:06"> Published< /AdminStat us><Organi zationName effectiveFrom="2005-08-18T 04:00:00" organizationNameTypeCode=" AKA" languageId="505062" organizationNameLocalNorma lized="TX"
<?xml version='1.0' encoding='utf-8'?><env:Con
<env:ContentItem action="Overwrite"><env:Da
ASKER
This is the code I'm using. I get this error message
Object reference not set to an instance of an object.
Object reference not set to an instance of an object.
using (StreamWriter sw = new StreamWriter("C:\\OAOrganization\\OrganizationNameParsed2.txt"))
{
XElement custOrd = XElement.Load("C:\\OAOrganization\\OrganizationName.txt");
string csv =
(from el in custOrd.Element("Body").Elements("ContentItem").Elements("Data").Elements("Organization").Elements("OrganizationName")
select
String.Format("{0},{1},{2},{3},{4}",
(string)el.Attribute("effectiveFrom"),
(string)el.Element("effectiveTo"),
(string)el.Element("organizationNameTypeCode"),
(string)el.Element("languageId"),
//(string)el.Element("organizationNameLocalNormalized"),
//(string)el.Element("FullAddress").Element("Address"),
//(string)el.Element("FullAddress").Element("City"),
//(string)el.Element("FullAddress").Element("Region"),
//(string)el.Element("FullAddress").Element("PostalCode"),
//(string)el.Element("FullAddress").Element("Country"),
Environment.NewLine
)
)
.Aggregate(
new StringBuilder(),
(sb, s) => sb.Append(s),
sb => sb.ToString()
);
sw.WriteLine(csv);
}
Could you post a valid xml file (basically that contains a complete node or nodes of what you need)?
ASKER
How is this?
C--OAOrganization-File.txt
C--OAOrganization-File.txt
ASKER
If the above is impossible then I have this code to put each element and attribute on a separate row. The only thing is it gives the OrganizationID as published which is the value for AdminStatus.
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Windows.Forms;
using System.Xml;
using System.IO;
using System.Xml.Linq;
namespace ParseXML
{
public partial class Form1 : Form
{
string FileText = "";
int CharCt;
string NodeName="";
string OrgID;
public Form1()
{
InitializeComponent();
}
private void Form1_Load(object sender, EventArgs e)
{
{
string xmlNode = "";
using (StreamReader sr = File.OpenText("C:\\OAOrganization\\File.txt"))
{
using (StreamWriter sw = new StreamWriter("C:\\OAOrganization\\OrganizationNameParsed.txt"))
{
try
{
while (sr.Peek() >= 0)
{
xmlNode = sr.ReadToEnd();
}
}
catch (Exception ex)
{
lblErrs.Text = ex.Message;
}
XmlReader xReader = XmlReader.Create(new StringReader(xmlNode));
while (xReader.Read())
{
switch (xReader.NodeType)
{
case XmlNodeType.Element:
NodeName = "";
//OrgID = "";
if (xReader.Name.ToUpper() != "ENV:HEADER"
&& xReader.Name.ToUpper() != "ENV:CONTENTENVELOPE"
&& xReader.Name.ToUpper() != "ENV:ID"
&& xReader.Name.ToUpper() != "ENV:INFO"
&& xReader.Name.ToUpper() != "ENV:TIMESTAMP"
&& xReader.Name.ToUpper() != "ENV:BODY"
&& xReader.Name.ToUpper() != "ENV:CONTENTITEM"
&& xReader.Name.ToUpper() != "ENV:DATA"
&& xReader.Name.ToUpper() != "ORGANIZATION"
&& xReader.Name.ToUpper().Contains("ADMIN") == false
&& xReader.Name.ToUpper().Contains("ISPUBLIC") == false
&& xReader.Name.ToUpper().Contains("IPODATE") == false
&& xReader.Name.ToUpper().Contains("ADDRESS") == false
&& xReader.Name.ToUpper().Contains("PHONE") == false
&& xReader.Name.ToUpper().Contains("FOUNDED") == false
&& xReader.Name.ToUpper().Contains("JURISDICTION") == false
&& xReader.Name.ToUpper().Contains("SUBTYPE") == false
&& xReader.Name.ToUpper().Contains("PROVIDER") == false)
{
//sw.Write("<" + xReader.Name + ">|");
//sw.Write("<" + xReader.Name + ">|" + xReader.ReadElementContentAsString() + "|</"+ xReader.Name + ">\r\n");
//sw.Write(xReader.Name + "|" + xReader.Value + "|" + xReader.Name + "\r\n");
NodeName = xReader.Name ;
if (xReader.Name == "OrganizationId")
{
OrgID = xReader.ReadElementContentAsString();
}
for (int attInd = 0; attInd < xReader.AttributeCount; attInd++)
{
if (xReader.Name.ToUpper().Contains("ADDRESS") == false
&& xReader.Name.ToUpper().Contains("EFFECTIVE") == false
&& xReader.Name.ToUpper().Contains("ADMIN") == false
&& xReader.Name.ToUpper().Contains("SOURCE") == false
&& xReader.Name.ToUpper().Contains("LANGUAGE") == false
&& xReader.Name.ToUpper().Contains("ISPUBLIC") == false
&& xReader.Name.ToUpper().Contains("IPODATE") == false)
{
if (xReader.Value != "")
{
sw.Write(OrgID + "|" + NodeName + "_" + xReader.Name + "|" + xReader.Value + "|" + NodeName + "_" + xReader.Name + "\r\n");
}
}
xReader.MoveToAttribute(attInd);
//NodeName = xReader.Name;
}
}
break;
case XmlNodeType.Text:
//NodeName = xReader.Name;
if (NodeName != "")
{
//xReader.
sw.Write(OrgID + "|" + NodeName + "|" + xReader.Value + "|" + NodeName);
sw.Write("\r\n");
}
break;
case XmlNodeType.EndElement:
break;
}
}
}
}
using (StreamReader sr = File.OpenText("C:\\OAOrganization\\OrganizationNameParsed.txt"))
{
using (StreamWriter sw = new StreamWriter("C:\\OAOrganization\\OrganizationNameParsed2.txt"))
{
try
{
while (sr.Peek() >= 0)
{
FileText = sr.ReadLine();
CharCt = FileText.IndexOf("<Organ");
if (CharCt > 0)
{
FileText = FileText.Substring(CharCt, FileText.Length - CharCt);
sw.Write(FileText);
sw.Write("\r\n");
}
}
}
catch (Exception ex)
{
lblErrs.Text = ex.Message;
}
}
}
}
Environment.Exit(0);
}
}
}
ASKER CERTIFIED SOLUTION
membership
This solution is only available to members.
To access this solution, you must be a member of Experts Exchange.
ASKER
That's great. Thanks.
I do not know the root element of your doco, but to access attributes use el.Attribute("effectiveFro
To save csv string use:
System.IO.File.WriteAllTex
For further help. we need an xml sample file to custom the link code but it should be straight forwards to code it.