移除符號(unicode範圍可自訂)及刮號<> [] {}內資料
import re
from openpyxl import load_workbook
# 定義一個函數來移除表情符號
def remove_emoji(text):
# 建立一個正則表達式來匹配表情符號的範圍
emoji_pattern = re.compile("["
u"\U00002045-\U000020E3"
u"\U000025A0-\U000027AB"
"]+", flags=re.UNICODE)
return emoji_pattern.sub(r'※', text)
filename=r'D:\Downloads\removeTag.xlsx'
wb = load_workbook(filename)
ws = wb.active
for i in range(1,10):#行號從1到9
try:
cell_value = ws['R'+str(i)].value
if cell_value is None or cell_value == '':
continue
else:
cell_value = re.sub(u'\\<.*?>||\\[.*?]||\\{.*?}', '', cell_value)
cell_value = remove_emoji(cell_value).strip()
if cell_value != '':
ws['S'+str(i)] = cell_value
except Exception as msg:
print('錯誤row: %d '%i,msg)
# Save the workbook
wb.save(filename)
Taiwan is a country. 臺灣是我的國家