Link to home
Start Free TrialLog in
Avatar of volodya16
volodya16

asked on

IE History Extractor

Hello,  I am trying to make a program that will go through the entire contents of the IE history folder (in the directory of a user profile) and extract the URLs to a text file.  Does anyone have any idea how to do this.  Thanks
Avatar of mark2150
mark2150

Yeah, did it a while ago in QBASIC. Lemme look around for the code...

M
FOUND IT!

' =====================================
'
' Program to decode NETSCAPE.HST files
'
' By:   Mark M. Lambert on October 11, 1996
' This program produced with public funds and is therefore
' property of:
' United States Department of Defense
' US Army, STRICOM - UNCLASSIFIED
'
' =====================================
'
' Revision History:
' V1.01 - 14 Oct 96 - MML - Make it take file from command line
' V1.0  - 11 Oct 96 - MML - Initial Code
'
' =====================================
'
' This program will examine a binary NETSCAPE.HST file and do
' it's best to convert it into ASCII. The goal is to be able to tell
' what web sites a user has been visiting.
'
' Examination of a NETSCAPE.HST file shows it as a binary file with
' lots of HTTP:// references in it in ASCII. What we'll do is shuffle
' thru the file dropping out non-ASCII and watching for HTTP in the
' data stream. Every time we hit one we start a new line in the output
' file. Don't know waht exactly that will give us, but we've got to
' start somewhere.
'
' =====================================
'
' These subs were generated with the SUBSET.BAS program and are in
' the MARKSLIB.LIB sublibrary
'
DECLARE SUB cursor (row!, col!, text$)
DECLARE SUB center (row!, text$)
DECLARE SUB box (text$)
DECLARE SUB big.box (hi!)
DECLARE SUB menu (hi!, text$)
'
DECLARE FUNCTION getkey$ ()
'
COMMON urhc$, ulhc$, lrhc$, llhc$, hb$, vb$, tt$, bt$, rt$, lt$, cx$
COMMON xwide, top$, side$, horz$, bot$, cr$, lf$, ff$, esc$, bell$
'
urhc$   = CHR$(187)
ulhc$   = CHR$(201)
lrhc$   = CHR$(188)
llhc$   = CHR$(200)
hb$     = CHR$(205)
vb$     = CHR$(186)
tt$     = CHR$(203)
bt$     = CHR$(202)
rt$     = CHR$(185)
lt$     = CHR$(204)
cx$     = CHR$(206)
xwide   = 77
top$    = ulhc$ + STRING$(xwide, hb$) + urhc$
side$   = vb$ + STRING$(xwide, " ") + vb$
horz$   = lt$ + STRING$(xwide, hb$) + rt$
bot$    = llhc$ + STRING$(xwide, hb$) + lrhc$
cr$     = CHR$(13)
lf$     = CHR$(10)
ff$     = CHR$(12)
esc$    = CHR$(27)
bell$   = CHR$(7)
'
CONST   Zero = 0
CONST   False   = zero
CONST   True    = not false
'
width lprint 255
if instr( ucase$( environ$( "CRT" ) ), "M") = zero then color 7, 1
on error goto trap
'
main:
'
' Get file name from command line and make sure it makes sense
'
filename$ = ucase$( ltrim$( rtrim$( command$ ) ) )
'
' Strip any extension
'
dotmark = instr( filename$, "." )
if dotmark > zero then filename$ = left$( filename$, dotmark -1 )
'
' Reject fancy
'
if instr( filename$, "*") then goto drain
if instr( filename$, ":") then goto drain
if instr( filename$, "?") then goto drain
if instr( filename$, "\") then goto drain
if instr( filename$, "/") then goto drain
'
' Well, we've rejected all forms of trash, check for nothing passed in
'
if ( len( filename$ ) < 1 ) or _
   ( len( filename$ ) > 8 ) then filename$ = "NETSCAPE"
'
on error goto drain
open filename$ + ".HST" for binary as #1
open filename$ + ".LOG" for binary as #2
on error goto trap
'
' Ok, we've got the file, lets start looking for markers.
' We really need a sliding window for the markers
'
if eof(1) then goto done
'
' This is the core part of STRIPPER.BAS converted to run a byte at a
' time. We need to have a sliding window watching the character stream
' and queue up four characters at a time
'
marker$ = "HTTP://"
window$ = ""
'
main.loop:
        s.in$   = " "
        if eof(1) then goto done
'
' Reverse sequence for last record
'
        get #1,, s.in$
'
        if (s.in$ = "") then goto done
'
        byte%   = asc(s.in$)
'
' Only allow printable ASCII & limited control thru
'
        if (((byte% >= 32) and (byte% < 127))) then
                window$ = window$ + chr$( byte% )
'
' Save stripped record
'
                if len( window$ ) > len( marker$ ) then
                        lastchar$ = left$( window$, 1 )
                        window$ = right$( window$, len( window$ ) -1 )
                        window$ = ucase$( window$ )
                        if window$ = marker$ then
                                put #2,, cr$
                                put #2,, lf$
                                goto main.loop
                        endif
'
                        put 2,, lastchar$
                endif
        endif
'
'        if (byte%  = 13) then
'                put #2,, cr$
'                put #2,, lf$
'        endif
'
        goto main.loop
'=================

done:
put #2,, cr$
put #2,, lf$
close
'
drain:
SYSTEM
' =====================================
'
' Subroutines:
'
' =====================================
'
' Routine:      
' Pass:        
' Returns:
'
RETURN
' =====================================
'
' Routine:      TRAP
' Here on any unexpected error
'
TRAP:
PRINT "Internal Error #";err;" has occurred!"
RESUME DRAIN
' =====================================
' =====================================
'
' Fin
'
' =====================================
' =====================================
This might help you a little bit.

"Retrieving the Internet Cache with FindFirst/NextUrlCacheEntry"
http://www.mvps.org/vbnet/code/fileapi/findfirstcacheentry.htm
ASKER CERTIFIED SOLUTION
Avatar of mcrider
mcrider

Link to home
membership
This solution is only available to members.
To access this solution, you must be a member of Experts Exchange.
Start Free Trial
Avatar of volodya16

ASKER

Looks like exactly what I need - the only thing is when I run it I get "runtime error '9': subscript out of range" at this part:

sURLs(iURLCount) = Mid$(sData, i + iDisplacement, j - (i + iDisplacement))

Since I didn't write the code I would strain myself trying to figure out how the array is set up - I'm still a beginner :).  But if you have any ideas I am definitely listening!!!  Thanx