摘要:Java版本 處理字幕檔
由於 java對中文的編碼有不同的格式, 處理字幕檔有些麻煩, 需先判斷編碼內容, 再去做處理, 輸出也不好處理, 最好找到一樣格式的srt檔案, 否則要先轉檔後再做擷取會比較好實作
程式內容:
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
public class Subtitle {
public static void main(String []args)throws IOException{
GetSubtitle mGetSubtitle = new GetSubtitle();
mGetSubtitle.startOperation();
mGetSubtitle.stopOperation();
}
}
class GetSubtitle{
public File fileEng;
public File fileCht;
public InputStreamReader readEng;
public InputStreamReader readCht;
public BufferedReader brEng;
public BufferedReader brCht;
public StringBuilder sbEng;
public StringBuilder sbCht;
public String strEng, strCht;
GetSubtitle() throws IOException{
initial();
}
public void initial() throws IOException{
fileEng = new File("C:\\Documents and Settings\\kent\\eclipse_work\\Subtitle\\Mission.Impossible_eng.txt");
fileCht = new File("C:\\Documents and Settings\\kent\\eclipse_work\\Subtitle\\Mission.Impossible_cht.txt");
readEng = new InputStreamReader (new FileInputStream(fileEng),"utf-8");
brEng = new BufferedReader(readEng);
}
public void startOperation() throws IOException{
while((strEng = brEng.readLine()) != null ){
if(strEng.contains(":")){
repeatReadCht();
}
}
}
public void repeatReadCht() throws IOException{
readCht = new InputStreamReader (new FileInputStream(fileCht),"utf-8");
brCht = new BufferedReader(readCht);
while((strCht = brCht.readLine()) != null){
if(strEng.equals(strCht)){
outputData();
break;
}
}
}
public void outputData() throws IOException{
while(strEng.length() != 0){
if((strEng = brEng.readLine()) != null){
System.out.print(strEng + " ");
}else{
break;
}
}
System.out.print("\r\n");
while(strCht.length() != 0 ){
if((strCht = brCht.readLine()) != null){
System.out.println(strCht);
}else{
break;
}
}
}
public void stopOperation() throws IOException{
brEng.close();
brCht.close();
}
}
網路上判斷編碼的方法:
class Convert{
Convert(){
}
public String convertCodeAndGetText() {
File file = new File("C:\\Documents and Settings\\kent\\eclipse_work\\Subtitle\\Avengers_cht.txt");
BufferedReader reader;
String text = "";
try{
FileInputStream fis = new FileInputStream(file);
BufferedInputStream in = new BufferedInputStream(fis);
in.mark(4);
byte[] first3bytes = new byte[3];
in.read(first3bytes);//找到文字檔的前三個字節並自動判斷文字檔類型
in.reset();
if(first3bytes[0] == (byte) 0xEF && first3bytes[1] == (byte) 0xBB && first3bytes[2] == (byte) 0xBF) {// utf-8
reader = new BufferedReader(new InputStreamReader(in, "utf-8"));
System.out.println("utf-8");
}else if(first3bytes[0] == (byte) 0xFF && first3bytes[1] == (byte) 0xFE) {
reader = new BufferedReader(new InputStreamReader(in, "unicode"));
System.out.println("unicode");
}else if(first3bytes[0] == (byte) 0xFE && first3bytes[1] == (byte) 0xFF) {
reader = new BufferedReader(new InputStreamReader(in,"utf-16be"));
System.out.println("utf-16be");
}else if(first3bytes[0] == (byte) 0xFF && first3bytes[1] == (byte) 0xFF) {
reader = new BufferedReader(new InputStreamReader(in, "utf-16le"));
System.out.println("utf-16le");
}else{
reader = new BufferedReader(new InputStreamReader(in, "GBK"));
System.out.println("GBK");
}
String str = reader.readLine();
while(str != null){
text = text + str + "\n";
str = reader.readLine();
}
reader.close();
}catch(FileNotFoundException e) {
e.printStackTrace();
}catch(IOException e) {
e.printStackTrace();
}
return text;
}
}