[Python] 移除符號及刮號內資料

移除符號(unicode範圍可自訂)及刮號<> [] {}內資料

import re
from openpyxl import load_workbook

# 定義一個函數來移除表情符號
def remove_emoji(text):
    # 建立一個正則表達式來匹配表情符號的範圍
    emoji_pattern = re.compile("["
        u"\U00002045-\U000020E3"
        u"\U000025A0-\U000027AB"
                           "]+", flags=re.UNICODE)
    return emoji_pattern.sub(r'※', text)

filename=r'D:\Downloads\removeTag.xlsx'
wb = load_workbook(filename)
ws = wb.active
for i in range(1,10):#行號從1到9
    try:      
      cell_value = ws['R'+str(i)].value
      if cell_value is None or cell_value == '':
            continue
      else:
            cell_value = re.sub(u'\\<.*?>||\\[.*?]||\\{.*?}', '', cell_value)
            cell_value = remove_emoji(cell_value).strip()
            if cell_value != '':
                  ws['S'+str(i)] = cell_value
    except Exception as msg:
      print('錯誤row: %d '%i,msg)

# Save the workbook
wb.save(filename)

Taiwan is a country. 臺灣是我的國家