string[] AllLines = File.ReadAllLines(FileName,Encoding.GetEncoding("iso-8859-1"));
Console.WriteLine("Encoding is: {0}", EncodingAscii.EncodingName);
=>
Encoding is: Unicode (UTF-8)
public static void Main(String[] args)
{
string filename = args[0];
using (FileStream fs = File.OpenRead(filename)) {
Ude.CharsetDetector cdet = new Ude.CharsetDetector();
cdet.Feed(fs);
cdet.DataEnd();
if (cdet.Charset != null) {
Console.WriteLine("Charset: {0}, confidence: {1}",
cdet.Charset, cdet.Confidence);
} else {
Console.WriteLine("Detection failed.");
}
}
}
intimate° intimateý & tête-à-tête
Saved as UTF8 I got Thisintimate� intimate� & t�te-�-t�te
using System;
using System.IO;
using System.Text;
namespace FileEncoding
{
class Program
{
static void Main(string[] args)
{
Console.WriteLine("Hello World!");
string UTFtest = @"C:\Temp\UTFtest.txt";
string UTFtestTxt = File.ReadAllText(UTFtest, Encoding.GetEncoding("iso-8859-1"));
File.WriteAllText(@"C:\Temp\UTFtestOut.txt", UTFtestTxt);
Console.WriteLine("UTFtestTxt iso-8859-1 {0}", UTFtestTxt);
UTF8Test(UTFtestTxt);
UTFtestTxt = File.ReadAllText(UTFtest);
Console.WriteLine("UTFtestTxt Default {0}", UTFtestTxt);
UTF8Test(UTFtestTxt);
}
private static void UTF8Test(string UTFtestTxt)
{
UTF8Encoding utf8 = new UTF8Encoding();
UTF8Encoding utf8ThrowException = new UTF8Encoding(false, true);
char[] chars = UTFtestTxt.ToCharArray();
// The following method call will not throw an exception.
Byte[] bytes = utf8.GetBytes(chars);
ShowArray(bytes);
Console.WriteLine();
try
{
// The following method call will throw an exception.
bytes = utf8ThrowException.GetBytes(chars);
ShowArray(bytes);
}
catch (EncoderFallbackException e)
{
Console.WriteLine("{0} exception\nMessage:\n{1}",
e.GetType().Name, e.Message);
}
}
public static void ShowArray(Array theArray)
{
foreach (Object o in theArray)
Console.Write("{0:X2} ", o);
Console.WriteLine();
}
}
}
Hello World!
UTFtestTxt iso-8859-1 intimate° intimatey & tête-à-tête
69 6E 74 69 6D 61 74 65 C2 B0 20 69 6E 74 69 6D 61 74 65 C3 BD 20 26 20 74 C3 AA 74 65 2D C3 A0 2D 74 C3 AA 74 65
69 6E 74 69 6D 61 74 65 C2 B0 20 69 6E 74 69 6D 61 74 65 C3 BD 20 26 20 74 C3 AA 74 65 2D C3 A0 2D 74 C3 AA 74 65
UTFtestTxt Default intimate? intimate? & t?te-?-t?te
69 6E 74 69 6D 61 74 65 EF BF BD 20 69 6E 74 69 6D 61 74 65 EF BF BD 20 26 20 74 EF BF BD 74 65 2D EF BF BD 2D 74 EF BF BD 74 65
69 6E 74 69 6D 61 74 65 EF BF BD 20 69 6E 74 69 6D 61 74 65 EF BF BD 20 26 20 74 EF BF BD 74 65 2D EF BF BD 2D 74 EF BF BD 74 65
var bytes = System.IO.File.ReadAllBytes(@"H:\Temp\UTFtest.txt");
try
{
var utf8 = new UTF8Encoding(false, true);
var result = utf8.GetString(bytes);
}
catch (Exception e)
{
Console.WriteLine("{0} exception\nMessage:\n{1}",
e.GetType().Name, e.Message);
}
Received this Unable to translate bytes [B0] at index 8 from specified code page to Unicode.
// read it as iso-8859-1
string OxfordTxt = File.ReadAllText(@"C:\Temp\UTFtestIn.txt", Encoding.GetEncoding("iso-8859-1"));
// Write as UTF8 shouldn't be any differance
File.WriteAllText(@"C:\Temp\UTFtestOut.txt", OxfordTxt);
affect° => affect°
affectý => affectý
Open in new window
You'll only need to overwrite or specify an encoding, when it's a format without marker bytes and/or detection is not possible for .NET.