摘要:拆解 DICT dictionary
private void separate()
{
FileStream objFsIdx = new FileStream("21shijishuangxiangcidian-big5.idx", FileMode.Open);
BinaryReader objBrIdx= new BinaryReader(objFsIdx);
FileStream objFsDict = new FileStream("21shijishuangxiangcidian-big5.dict", FileMode.Open);
BinaryReader objBrDict = new BinaryReader(objFsDict);
TextWriter objTW = new StreamWriter("21data.txt");
//basic parameters
int intOffest = 0;
int intLength = 0;
int intPoint = 0;
int[] intArray=new int[100];
while (objBrIdx.BaseStream.Position < objBrIdx.BaseStream.Length)
{
int intNow = objBrIdx.ReadByte();
intArray[intPoint] = intNow;
if (intNow == 00)
{
//Console.WriteLine(word_str(intArray));
intOffest = word_offset(objBrIdx.ReadBytes(4));
intLength=word_length(objBrIdx.ReadBytes(4));
objTW.WriteLine (word_str(intArray,intPoint) +"\\n"+getvocabularyinformation(objBrDict, intOffest, intLength));
intPoint = 0;
break;
//continue;
}
intPoint += 1;
}
objTW.Close();
}
private String word_str(int[] intArray, int intEnd)
{
String word = "";
for (int intX = 0; intX <intEnd; intX++)
{
if (intArray[intX] != 00)
{
word += Convert.ToChar(intArray[intX]);
}
}
return word;
}
private int word_offset(byte[] byteArray)
{
int offset = 0;
String strHex = "";
for (int intX = 0; intX < byteArray.Length; intX++)
{
strHex+=Convert.ToString(byteArray[intX],16).PadLeft(2,'0');
}
offset = Convert.ToInt32(strHex, 16);
return offset;
}
private int word_length(byte[] byteArray)
{
int length = 0;
String strHex = "";
for (int intX = 0; intX < byteArray.Length; intX++)
{
strHex += Convert.ToString(byteArray[intX], 16).PadLeft(2, '0');
}
length = Convert.ToInt32(strHex, 16);
return length;
}
private String getvocabularyinformation(BinaryReader objBR, int intO, int intL)
{
String strX="";
while (objBR.BaseStream.Position < objBR.BaseStream.Length)
{
objBR.BaseStream.Position = intO;
byte[] objBytes=objBR.ReadBytes(intL);
strX= System.Text.Encoding.UTF8.GetString(objBytes);
strX = strX.Replace("\n", "\\n");
//Console.WriteLine(strX);
break;
}
return strX;
}
其中offset 及 length 可以用 位移 的方式來計算, 直接轉char輸出 word_str就可省了.
我把 separate() 改了一下, getvocabularyinformation() 也省, 你看看...

02

03

04

05

06

07

08

09

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35
