• Status: Solved
  • Priority: Medium
  • Security: Public
  • Views: 467
  • Last Modified:

XML Download and Bulk Insert into MS SQL Database

I have a very serious problem.  Our programmer left the company and I've been tasked to find a solution to the following problem, although I have no programming experience and very little SQL experience.  He created a program that downloads XML data from a website, then bulk inserts the data into a table in an MS SQL database.  The problem is it ignores data that is previously in the table, but I need to be able to update that data if need be.  In other words, I want to update existing records if the data has changed between inserts.

Here's the program:

<!-- ---------------------------------------------------------------
-
- Title   : XXXXTrans.wsf
- Author  : XXXX
- Created : 30 Jun 2004
- Purpose : Query daily XXXX transactions and import into
-           XXXX database.
- Params  : HttpTimeout    : Timeout in seconds, -1 = Infinite.
-                            Default is 10 minutes (600 seconds).
-           TransStartDate : Transaction start date relative to
-                            today. Default is -1 (yesterday).
-           TransEndDate   : Transaction end date relative to today
-                            Default is 0 (today).
- Returns : N/A
- Notes   :
- History :
-
---------------------------------------------------------------- -->

<job id="XXXX XML Import">
<script language="JScript" src="lib\AddrList.js"/>
<script language="JScript" src="lib\DateUtils.js"/>
<script language="JScript" src="lib\FileUtils.js"/>
<script language="JScript" src="lib\MailUtils.js"/>
<script language="VBScript" src="QueryEfsNet.vbs"/>
<script language="JScript">


var ErrorLogFile = "TransErr.log";
var Database     = 'XXXX';
var SqlServer    = 'XXXX';

var XmlTempFile  = "XXXX.xml";


var g_SessionLog = '';
function Trace(Text)
{
  WScript.Echo(Text);
  g_SessionLog = g_SessionLog + "\r\n" + Text;
}

var Args = WScript.Arguments;
if(Args.length > 3)
{
  var WshShell = new ActiveXObject('WScript.Shell');
  WshShell.Popup('Invalid script arguments.\r\n\r\nSyntax: [HttpTimeout [TransStartDate [TransEndDate]]]', 30, 'Syntax Error');
  WScript.Quit(255);
}

function lpad(v)
{
  return (v < 10 ? '0' : '') + v;
}

function RelativeDate(Diff)
{
  var Now   = new Date();
  var Today = new Date(Now.getYear(), Now.getMonth(), Now.getDate());
  var Rel   = new Date(Today.valueOf() + (MILLISECONDSPERDAY * Diff));  
  var m     = Rel.getMonth() + 1;
  var d     = Rel.getDate();
  var y     = Rel.getYear();
  return y.toString().substr(2, 2) + lpad(m) + lpad(d);
}

var Timeout        = 600;
var TransStartDate = RelativeDate(-14);
var TransEndDate   = RelativeDate(0);

switch(Args.length)
{
  case 3:
    TransEndDate   = RelativeDate(parseInt(Args(2)));
  case 2:
    TransStartDate = RelativeDate(parseInt(Args(1)));
  case 1:
    Timeout        = parseInt(Args(0));
  default:
    break;
}

Trace('Timeout       = ' + Timeout);
Trace('TxStartDate   = ' + TransStartDate);
Trace('TxEndDate     = ' + TransEndDate);

var Success = true;  
var ErrorLogExists = false;

var Provider   = 'Provider=SQLOLEDB.1;Integrated Security=SSPI;Persist Security Info=False;'
var Catalog    = 'Initial Catalog=' + Database + ';'
var DataSource = 'Data Source=' + SqlServer + ';';

try {
  var XmlInputFile = fso().GetAbsolutePathName(XmlTempFile);
  Trace('XML temp file = ' + XmlInputFile);

  var ArchivePath = fso().GetParentFolderName(XmlInputFile) + '\\Archive\\';
  Trace('Archive path  = ' + ArchivePath);

  Trace('Querying XXXX for transaction data...');
  var URL = XXXXQueryTransactions(TransStartDate, TransEndDate);
  Trace('Query result URL = ' + URL);

  if(URL == '')
    throw new Error(0, 'Empty URL returned from EfsNet');

  var Http = new ActiveXObject("HttpUtil.Http");
  var ResCode = Http.Get(URL, XmlInputFile, Timeout);
  Trace('HTTP Result = ' + ResCode);

  if(ResCode != '200')
    throw new Error(0, 'HTTP GET failed');

  if(!fso().FileExists(XmlInputFile))
    throw new Error(0, 'XML data file not found: ' + XmlInputFile);

  if(fso().FileExists(ErrorLogFile))
    fso().DeleteFile(ErrorLogFile);

  Trace('Archiving XML data file...');
  var TS = TimeStamp('') + '_';
  fso().CopyFile(XmlInputFile, ArchivePath + TS + 'XXXX.xml', false);

  Trace('Creating XML Loader Com Object...');
  var Bulk = new ActiveXObject("SQLXMLBulkLoad.SQLXMLBulkLoad");

  Trace('Configuring loader options...');
  Bulk.ConnectionString = Provider + Catalog + DataSource;
  Bulk.IgnoreDuplicateKeys = true;
  //Bulk.Transaction = true;
  Bulk.ErrorLogFile = ErrorLogFile;

  Trace('Executing XML Bulk Load...');
  Bulk.Execute("XXXXTransMap.xml", XmlInputFile);
}
catch(e) {
  Trace(e.description);
  Success = false;  
}

try {
  ErrorLogExists = fso().FileExists(ErrorLogFile);
}
catch(e) {
  Trace(e.description);
  Success = false;  
}

var SendTo     = e_XXXX;
var SendCC     = '';
var SendFrom   = e_XXXX;
var Subject    = 'XXXX XML Import';
var MsgBody    = (!Success || ErrorLogExists) ? 'Error(s) occurred during import.\r\n' : 'Import succeeded.\r\n';
var Attachment = ErrorLogExists ? ErrorLogFile : '';

SendMail(SendTo, SendCC, Subject, MsgBody + g_SessionLog, Attachment, SendFrom);

</script>
</job>
//eof

Thanks for the help,

David
0
uxphreak
Asked:
uxphreak
  • 2
  • 2
2 Solutions
 
chippleCommented:
Is emptying the table everytime an option? Or do you need to keep some older data?
0
 
uxphreakAuthor Commented:
I need to keep all the data in case it needs to be referenced again.

David
0
 
chippleCommented:
Then the only option I see is to bulk load data in a temporary empty table, and then process it manually.

However with the information that you give it's not possible to help much further than saying this, and things may get a bit complex.
I guess your database structure is in XXXXTransMap.xml, so if you could show what's in there it could be helpful.
0
 
rdcproCommented:
Using the SQL XML Bulk load component, the IgnoreDuplicateKeys option will leave the rows already in the table untouched.  If you set this to false, it will fail on the insert when it gets to a row that's already there.

I'm not sure that the bulk loader component can do what you want; it uses the bulk load interface of SQL Server's SQLOLEDB native OLE DB provider, and I don't think you can update existing rows.  Read SQL Books Online about the BULK INSERT operation, and it's limitations.  This is really what an UpdateGram is for, but it's not efficient for large volumes of data.  

How big is this insert?

If it's large, Chipple may be right--you might have to load to a temp table, and write a trigger that calls a sproc to process the newly updated records. Maybe you can do this as a DTS package.

Regards,
Mike Sharp
0
 
rdcproCommented:
Also, it is possible that there is existing data in the table that is not in the feed at all?  

Regards,
Mike Sharp
0

Featured Post

Free Tool: SSL Checker

Scans your site and returns information about your SSL implementation and certificate. Helpful for debugging and validating your SSL configuration.

One of a set of tools we are providing to everyone as a way of saying thank you for being a part of the community.

  • 2
  • 2
Tackle projects and never again get stuck behind a technical roadblock.
Join Now