Dimkov
asked on
fopen problem
Hi, I am migrating solution from vs2005 to vs2003...pls don't ask why :)
in there, there is function:
wstring getXml(const wstring& str)
{
FILE * pf = _wfopen(str.c_str(), L"rt, ccs=UTF-8");
wstring ws;
wchar_t wsz[8192]; // big enough for very long lines
if (pf==NULL) return ws;
while (fgetws(wsz, sizeof(wsz), pf) != NULL)
{
ws += wsz;
}
fclose(pf);
return ws;
}
in VS2003 UTF-8 is not supported with wfopen(as JKR pointed out) :(, so when reading any xml i get garbage for the unicode characters.
can anyone please help me to find replacement of this function? I will be greatly grateful
http://msdn2.microsoft.com/en-us/library/yeby3zcb(VS.80).aspx
http://msdn2.microsoft.com/en-us/library/yeby3zcb(vs.71).aspx
in there, there is function:
wstring getXml(const wstring& str)
{
FILE * pf = _wfopen(str.c_str(), L"rt, ccs=UTF-8");
wstring ws;
wchar_t wsz[8192]; // big enough for very long lines
if (pf==NULL) return ws;
while (fgetws(wsz, sizeof(wsz), pf) != NULL)
{
ws += wsz;
}
fclose(pf);
return ws;
}
in VS2003 UTF-8 is not supported with wfopen(as JKR pointed out) :(, so when reading any xml i get garbage for the unicode characters.
can anyone please help me to find replacement of this function? I will be greatly grateful
http://msdn2.microsoft.com/en-us/library/yeby3zcb(VS.80).aspx
http://msdn2.microsoft.com/en-us/library/yeby3zcb(vs.71).aspx
I am actually surprise that vs2005 supports UTF-8 on a wchar_t* string.
That really doesn't make sense. The main point of using UTF-8, is that you can use less bytes then UNICODE 2/4 byte wstring, and still be able to reference local characters.
UTF-8 is normally used with char* type API functions, and not wchar_t* types.
That really doesn't make sense. The main point of using UTF-8, is that you can use less bytes then UNICODE 2/4 byte wstring, and still be able to reference local characters.
UTF-8 is normally used with char* type API functions, and not wchar_t* types.
FILE * pf = _fopen((const char*)str.c_str(), "rt"); //If you're sure this is a true UTF-8 string, then cast it to (const char*)
wstring ws;
wchar_t wsz[8192]; // big enough for very long lines
if (pf==NULL) return ws;
while (fgetws(wsz, sizeof(wsz), pf) != NULL)
{
ws += wsz;
}
wstring ws;
wchar_t wsz[8192]; // big enough for very long lines
if (pf==NULL) return ws;
while (fgetws(wsz, sizeof(wsz), pf) != NULL)
{
ws += wsz;
}
ASKER
Axter, thanks for the input
FILE * pf = _fopen((const char*)str.c_str(), "rt"); //If you're sure this is a true UTF-8 string, then cast it to (const char*)
This is not needed since it just represents the name of the file to be opened. The problem is extracting the characters from the file.
FILE * pf = _fopen((const char*)str.c_str(), "rt"); //If you're sure this is a true UTF-8 string, then cast it to (const char*)
This is not needed since it just represents the name of the file to be opened. The problem is extracting the characters from the file.
>>This is not needed since it just represents the name of the file to be opened. The problem is extracting the
>>characters from the file.
Then extract the characters using char* API, and not wchar_t type.
char sz[8192] = {0}; // big enough for very long lines
if (pf==NULL) return ws;
while (fgets(sz, sizeof(wsz), pf) != NULL)
{
//Convert sz to a wide character string using mbtowc API
ws += wsz;
}
>>characters from the file.
Then extract the characters using char* API, and not wchar_t type.
char sz[8192] = {0}; // big enough for very long lines
if (pf==NULL) return ws;
while (fgets(sz, sizeof(wsz), pf) != NULL)
{
//Convert sz to a wide character string using mbtowc API
ws += wsz;
}
ASKER
does mbtowcs support UTF-8?
char sz[8192] = {0}; // big enough for very long lines
if (pf==NULL) return ws;
while (fgets(sz, sizeof(sz), pf) != NULL)
{
//Convert sz to a wide character string using mbtowc API
wchar_t wsz[sizeof(sz)] = {0};
mbtowc(wsz,sz,strlen(sz));
ws += wsz;
}
if (pf==NULL) return ws;
while (fgets(sz, sizeof(sz), pf) != NULL)
{
//Convert sz to a wide character string using mbtowc API
wchar_t wsz[sizeof(sz)] = {0};
mbtowc(wsz,sz,strlen(sz));
ws += wsz;
}
>>does mbtowc support UTF-8?
Yes. mb stands for multi-byte.
Yes. mb stands for multi-byte.
FYI:
There's also an opposite function, called wctomb, which converts it from UNICODE to UTF-8.
There's also an opposite function, called wctomb, which converts it from UNICODE to UTF-8.
ASKER
nope, it doesn't work.
but i found
wchar_t wsz[sizeof(sz)] = {0};
MultiByteToWideChar(CP_UTF 8,0,sz,str len(sz+1), wsz,sizeof (wsz));
ws += wsz;
which works fine.
Pls just give me a line how to remove the BOM flag, since it is there in the file, and destroys the XML structure
but i found
wchar_t wsz[sizeof(sz)] = {0};
MultiByteToWideChar(CP_UTF
ws += wsz;
which works fine.
Pls just give me a line how to remove the BOM flag, since it is there in the file, and destroys the XML structure
ASKER
nooo even MultiByteToWideChar is not working :(
it does not give the value correct
now, i am stuck :(
it does not give the value correct
now, i am stuck :(
>>nooo even MultiByteToWideChar is not working :(
Can you give us a small UTF-8 string that is in your file, and that is not being read correctly?
Can you give us a small UTF-8 string that is in your file, and that is not being read correctly?
ASKER
it is not a problem, but the text area here does not support it: I can send it to you by mail or put it on rapidshare
Can you point to the specific string in this file that is causing the problem?
I might have to finish trying this out later tonight, because I have to go off line in a few minutes.
I might have to finish trying this out later tonight, because I have to go off line in a few minutes.
ASKER
in
mes:Name>Ministrstvo za javno upravo</mes:Name>
<mes:Street>Tr~aaka cesta 21</mes:Street>
<mes:PostalCode>1000</mes: PostalCode >
in Tr~aaka there are 2 of these characters
I will be here all night.. I have to finish the project by tomorrow :)
mes:Name>Ministrstvo za javno upravo</mes:Name>
<mes:Street>Tr~aaka cesta 21</mes:Street>
<mes:PostalCode>1000</mes:
in Tr~aaka there are 2 of these characters
I will be here all night.. I have to finish the project by tomorrow :)
ASKER
Axter, by using:
wstring getXml (wstring str)
{
FILE * pFile;
long lSize;
char * buffer1;
size_t result;
wstring resultingString;
pFile = _wfopen ( str.c_str() , L"rb" );
if (!pFile) return resultingString;
// obtain file size:
fseek (pFile , 0 , SEEK_END);
lSize = ftell (pFile);
rewind (pFile);
char bom[3];
fread(bom, 1, 3, pFile);
bool dali=true;
if (bom[0]!=-17 && bom[1]!=-69 && bom[2]!=-65)
{
dali=false;
rewind(pFile);
}
// allocate memory to contain the whole file:
buffer1 = (char*) malloc (sizeof(char)*lSize);
// copy the file into the buffer:
result = fread (buffer1,1,lSize,pFile);
if (!dali)
{
buffer1[lSize]='\0';
}
else
{
buffer1[lSize-3]='\0';
}
fclose (pFile);
wchar_t *dest=(wchar_t*) malloc(lSize*sizeof(wchar_ t));
MultiByteToWideChar(CP_UTF 8, 0, buffer1, -1, dest, lSize);
resultingString=dest;
return resultingString;
}
I managed to get a valid string in buffer1. But when i send it to MultiByteToWideChar the result I get is wrong...
i don't think this can be solved :(
wstring getXml (wstring str)
{
FILE * pFile;
long lSize;
char * buffer1;
size_t result;
wstring resultingString;
pFile = _wfopen ( str.c_str() , L"rb" );
if (!pFile) return resultingString;
// obtain file size:
fseek (pFile , 0 , SEEK_END);
lSize = ftell (pFile);
rewind (pFile);
char bom[3];
fread(bom, 1, 3, pFile);
bool dali=true;
if (bom[0]!=-17 && bom[1]!=-69 && bom[2]!=-65)
{
dali=false;
rewind(pFile);
}
// allocate memory to contain the whole file:
buffer1 = (char*) malloc (sizeof(char)*lSize);
// copy the file into the buffer:
result = fread (buffer1,1,lSize,pFile);
if (!dali)
{
buffer1[lSize]='\0';
}
else
{
buffer1[lSize-3]='\0';
}
fclose (pFile);
wchar_t *dest=(wchar_t*) malloc(lSize*sizeof(wchar_
MultiByteToWideChar(CP_UTF
resultingString=dest;
return resultingString;
}
I managed to get a valid string in buffer1. But when i send it to MultiByteToWideChar the result I get is wrong...
i don't think this can be solved :(
http://rapidshare.com/files/60532191/eVrocanje-bianco.xml.html
Is the above link suppose to have the string you posted?
<mes:Street>Tr~aaka cesta 21</mes:Street>
I don't see anything that looks like the above string in the link.
Is the above link suppose to have the string you posted?
<mes:Street>Tr~aaka cesta 21</mes:Street>
I don't see anything that looks like the above string in the link.
ASKER
it is there, line 19
<mes:PhysicalAddress>
<mes:Name>Ministrstvo za javno upravo</mes:Name>
<mes:Street>Tr~aaka cesta 21</mes:Street>
<mes:PostalCode>1000</mes: PostalCode >
<mes:City>Ljubljana</mes:C ity>
</mes:PhysicalAddress>
ne mes:Street node. Unfortunately, when posted, https://www.experts-exchange.com also distords the text
<mes:PhysicalAddress>
<mes:Name>Ministrstvo za javno upravo</mes:Name>
<mes:Street>Tr~aaka cesta 21</mes:Street>
<mes:PostalCode>1000</mes:
<mes:City>Ljubljana</mes:C
</mes:PhysicalAddress>
ne mes:Street node. Unfortunately, when posted, https://www.experts-exchange.com also distords the text
ASKER
Axter, i sloved the problem by adding another function:
BSTR UTF8toUTF16(const char * pSrc, int cbSrc = -1)
{
BSTR ret = NULL;
DWORD cwch;
if (cbSrc < 0)
cbSrc = strlen(pSrc);
// Get output size
if (cwch = MultiByteToWideChar(CP_UTF 8, 0, pSrc, cbSrc + 1, NULL, 0))
{
//cwch--;
ret = SysAllocStringLen(NULL, cwch);
if(ret)
{
// Convert from UTF8 into WideString
if(!MultiByteToWideChar(CP _UTF8, 0, pSrc, cbSrc + 1, ret, cwch))
{
SysFreeString(ret);//must clean up
ret = NULL;
}
}
}
return ret;
}
which converts char * to bstr. Afterwards the casting to wstring is no problem
I will ask this question to be closed
BSTR UTF8toUTF16(const char * pSrc, int cbSrc = -1)
{
BSTR ret = NULL;
DWORD cwch;
if (cbSrc < 0)
cbSrc = strlen(pSrc);
// Get output size
if (cwch = MultiByteToWideChar(CP_UTF
{
//cwch--;
ret = SysAllocStringLen(NULL, cwch);
if(ret)
{
// Convert from UTF8 into WideString
if(!MultiByteToWideChar(CP
{
SysFreeString(ret);//must clean up
ret = NULL;
}
}
}
return ret;
}
which converts char * to bstr. Afterwards the casting to wstring is no problem
I will ask this question to be closed
>>I will ask this question to be closed
Since this function is taking a char* type, I'm assuming you took my original advice, which is to read it into char* buffer.
And this function is also converting it from char* to wide string, which is also part of my original advice.
Considering your solution incorporates suggestions given, IMHO, an answer should be accepted.
Since this function is taking a char* type, I'm assuming you took my original advice, which is to read it into char* buffer.
And this function is also converting it from char* to wide string, which is also part of my original advice.
Considering your solution incorporates suggestions given, IMHO, an answer should be accepted.
I just ran a test, and your modified method does not yield different results, from the original method I proposed.
Here's example code:
BSTR UTF8toUTF16(const char * pSrc, int cbSrc = -1)
{
BSTR ret = NULL;
DWORD cwch;
if (cbSrc < 0)
cbSrc = strlen(pSrc);
// Get output size
if (cwch = MultiByteToWideChar(CP_UTF 8, 0, pSrc, cbSrc + 1, NULL, 0))
{
//cwch--;
ret = SysAllocStringLen(NULL, cwch);
if(ret)
{
// Convert from UTF8 into WideString
if(!MultiByteToWideChar(CP _UTF8, 0, pSrc, cbSrc + 1, ret, cwch))
{
SysFreeString(ret);//must clean up
ret = NULL;
}
}
}
return ret;
}
BSTR ModifiedProposedMethod(con st wstring& str)
{
FILE * pf = _wfopen(str.c_str(), L"rt");
char sz[32000] = {0}; // big enough for very long lines
if (pf==NULL) return NULL;
size_t QtyRead = 0;
if ( ( QtyRead = fread(sz, 1, sizeof(sz), pf)) > 0)
{
fclose(pf);
return UTF8toUTF16(sz, QtyRead);
}
fclose(pf);
return NULL;
}
wstring OriginalProposedMethod(con st wstring& str)
{
FILE * pf = _wfopen(str.c_str(), L"rt");
wstring ws;
char sz[8192] = {0}; // big enough for very long lines
if (pf==NULL) return ws;
size_t QtyRead = 0;
while ( ( QtyRead = fread(sz, 1, sizeof(sz), pf)) > 0)
{
wchar_t wsz[sizeof(sz)] = {0};
int t1 = MultiByteToWideChar(CP_UTF 8, 0, sz, QtyRead, wsz, sizeof(sz));
size_t t2 = wcslen(wsz);
ws += wsz;
}
fclose(pf);
return ws;
}
int main()
{
BSTR s1 = ModifiedProposedMethod(L"C :\\TMP\\Te X.txt");
wstring s2x = OriginalProposedMethod(L"C :\\TMP\\Te X.txt");
const wchar_t *s2 = s2x.c_str();
wcout << s1 << endl;
wcout << s2 << endl;
wcout << endl;
system("pause");
return 0;
}
Here's example code:
BSTR UTF8toUTF16(const char * pSrc, int cbSrc = -1)
{
BSTR ret = NULL;
DWORD cwch;
if (cbSrc < 0)
cbSrc = strlen(pSrc);
// Get output size
if (cwch = MultiByteToWideChar(CP_UTF
{
//cwch--;
ret = SysAllocStringLen(NULL, cwch);
if(ret)
{
// Convert from UTF8 into WideString
if(!MultiByteToWideChar(CP
{
SysFreeString(ret);//must clean up
ret = NULL;
}
}
}
return ret;
}
BSTR ModifiedProposedMethod(con
{
FILE * pf = _wfopen(str.c_str(), L"rt");
char sz[32000] = {0}; // big enough for very long lines
if (pf==NULL) return NULL;
size_t QtyRead = 0;
if ( ( QtyRead = fread(sz, 1, sizeof(sz), pf)) > 0)
{
fclose(pf);
return UTF8toUTF16(sz, QtyRead);
}
fclose(pf);
return NULL;
}
wstring OriginalProposedMethod(con
{
FILE * pf = _wfopen(str.c_str(), L"rt");
wstring ws;
char sz[8192] = {0}; // big enough for very long lines
if (pf==NULL) return ws;
size_t QtyRead = 0;
while ( ( QtyRead = fread(sz, 1, sizeof(sz), pf)) > 0)
{
wchar_t wsz[sizeof(sz)] = {0};
int t1 = MultiByteToWideChar(CP_UTF
size_t t2 = wcslen(wsz);
ws += wsz;
}
fclose(pf);
return ws;
}
int main()
{
BSTR s1 = ModifiedProposedMethod(L"C
wstring s2x = OriginalProposedMethod(L"C
const wchar_t *s2 = s2x.c_str();
wcout << s1 << endl;
wcout << s2 << endl;
wcout << endl;
system("pause");
return 0;
}
ASKER
are you testing in VS 2003 or vs2005?
ASKER CERTIFIED SOLUTION
membership
This solution is only available to members.
To access this solution, you must be a member of Experts Exchange.
Either convert the file name to UTF-8 (char* string)
or convert it to UNICODE (wchar_t* string)