匯入資料時,需要錯的髒資料處理中,
一定會過到的空字符過濾,
明明看起來一樣,欄位比對的結果卻不相同!
看起來一樣,資料內容的長度卻不相同,
char(14) char(15)? char(3) 資料裡面怎麼會有這個
先來了解 char(14) 是什麼東東
-----------------------------------------------------------------------------------------------------
ASCII Table and ASCII Code ( 0 - 127 ) Decimal Octal Hex Binairy Character ------- ----- ---- -------- ---------------------------- 000 000 0x00 00000000 NUL (Null char.) 001 001 0x01 00000001 SOH (Start of Header) 002 002 0x02 00000010 STX (Start of Text) 003 003 0x03 00000011 ETX (End of Text) 004 004 0x04 00000100 EOT (End of Transmission) 005 005 0x05 00000101 ENQ (Enquiry) 006 006 0x06 00000110 ACK (Acknowledgment) 007 007 0x07 00000111 BEL (Bell) 008 010 0x08 00001000 BS (Backspace) 009 011 0x09 00001001 HT (Horizontal Tab) 010 012 0x0A 00001010 LF (Line Feed) 011 013 0x0B 00001011 VT (Vertical Tab) 012 014 0x0C 00001100 FF (Form Feed) 013 015 0x0D 00001101 CR (Carriage Return) 014 016 0x0E 00001110 SO (Shift Out) 015 017 0x0F 00001111 SI (Shift In) 016 020 0x10 00010000 DLE (Data Link Escape) 017 021 0x11 00010001 DC1 (XON)(Device Control 1) 018 022 0x12 00010010 DC2 (Device Control 2) 019 023 0x13 00010011 DC3 (XOFF)(Device Control 3) 020 024 0x14 00010100 DC4 (Device Control 4) 021 025 0x15 00010101 NAK (Negative Acknowledgement) 022 026 0x16 00010110 SYN (Synchronous Idle) 023 027 0x17 00010111 ETB (End of Trans. Block) 024 030 0x18 00011000 CAN (Cancel) 025 031 0x19 00011001 EM (End of Medium) 026 032 0x1A 00011010 SUB (Substitute) 027 033 0x1B 00011011 ESC (Escape) 028 034 0x1C 00011100 FS (File Separator) 029 035 0x1D 00011101 GS (Group Separator) ctrl 030 036 0x1E 00011110 RS (Request to Send)(Record Separator) code 031 037 0x1F 00011111 US (Unit Separator) ---------- ----- ---- -------- ---------------------------- prnt- 032 040 0x20 00100000 SP (Space) able 033 041 0x21 00100001 ! (exclamation mark) code 034 042 0x22 00100010 " (double quote) 035 043 0x23 00100011 # (number sign) 036 044 0x24 00100100 $ (dollar sign) 037 045 0x25 00100101 % (percent) 038 046 0x26 00100110 & (ampersand) 039 047 0x27 00100111 ' (single quote) 040 050 0x28 00101000 ( (left opening parenthesis) 041 051 0x29 00101001 ) (right closing parenthesis) 042 052 0x2A 00101010 * (asterisk) 043 053 0x2B 00101011 + (plus) 044 054 0x2C 00101100 , (comma) 045 055 0x2D 00101101 - (minus or dash) 046 056 0x2E 00101110 . (dot) 047 057 0x2F 00101111 / (forward slash) 048 060 0x30 00110000 0 049 061 0x31 00110001 1 050 062 0x32 00110010 2 051 063 0x33 00110011 3 052 064 0x34 00110100 4 053 065 0x35 00110101 5 054 066 0x36 00110110 6 055 067 0x37 00110111 7 056 070 0x38 00111000 8 057 071 0x39 00111001 9 058 072 0x3A 00111010 : (colon) 059 073 0x3B 00111011 ; (semi-colon) 060 074 0x3C 00111100 < (less than sign) 061 075 0x3D 00111101 = (equal sign) 062 076 0x3E 00111110 > (greater than sign) 063 077 0x3F 00111111 ? (question mark) 064 100 0x40 01000000 @ (AT symbol) 065 101 0x41 01000001 A 066 102 0x42 01000010 B 067 103 0x43 01000011 C 068 104 0x44 01000100 D 069 105 0x45 01000101 E 070 106 0x46 01000110 F 071 107 0x47 01000111 G 072 110 0x48 01001000 H 073 111 0x49 01001001 I 074 112 0x4A 01001010 J 075 113 0x4B 01001011 K 076 114 0x4C 01001100 L 077 115 0x4D 01001101 M 078 116 0x4E 01001110 N 079 117 0x4F 01001111 O 080 120 0x50 01010000 P 081 121 0x51 01010001 Q 082 122 0x52 01010010 R 083 123 0x53 01010011 S 084 124 0x54 01010100 T 085 125 0x55 01010101 U 086 126 0x56 01010110 V 087 127 0x57 01010111 W 088 130 0x58 01011000 X 089 131 0x59 01011001 Y 090 132 0x5A 01011010 Z 091 133 0x5B 01011011 [ (left opening bracket) 092 134 0x5C 01011100 \ (back slash) 093 135 0x5D 01011101 ] (right closing bracket) 094 136 0x5E 01011110 ^ (caret cirumflex) 095 137 0x5F 01011111 _ (underscore) 096 140 0x60 01100000 ` 097 141 0x61 01100001 a 098 142 0x62 01100010 b 099 143 0x63 01100011 c 100 144 0x64 01100100 d 101 145 0x65 01100101 e 102 146 0x66 01100110 f 103 147 0x67 01100111 g 104 150 0x68 01101000 h 105 151 0x69 01101001 i 106 152 0x6A 01101010 j 107 153 0x6B 01101011 k 108 154 0x6C 01101100 l 109 155 0x6D 01101101 m 110 156 0x6E 01101110 n 111 157 0x6F 01101111 o 112 160 0x70 01110000 p 113 161 0x71 01110001 q 114 162 0x72 01110010 r 115 163 0x73 01110011 s 116 164 0x74 01110100 t 117 165 0x75 01110101 u 118 166 0x76 01110110 v 119 167 0x77 01110111 w 120 170 0x78 01111000 x 121 171 0x79 01111001 y 122 172 0x7A 01111010 z 123 173 0x7B 01111011 { (left opening brace) 124 174 0x7C 01111100 | (vertical bar) 125 175 0x7D 01111101 } (right closing brace) 126 176 0x7E 01111110 ~ (tilde) 127 177 0x7F 01111111 DEL (delete)
A compact ASCII table.
Char Dec Oct Hex | Char Dec Oct Hex | Char Dec Oct Hex | Char Dec Oct Hex ------------------------------------------------------------------------------- (nul) 0 0000 0x00 | (sp) 32 0040 0x20 | @ 64 0100 0x40 | ` 96 0140 0x60 (soh) 1 0001 0x01 | ! 33 0041 0x21 | A 65 0101 0x41 | a 97 0141 0x61 (stx) 2 0002 0x02 | " 34 0042 0x22 | B 66 0102 0x42 | b 98 0142 0x62 (etx) 3 0003 0x03 | # 35 0043 0x23 | C 67 0103 0x43 | c 99 0143 0x63 (eot) 4 0004 0x04 | $ 36 0044 0x24 | D 68 0104 0x44 | d 100 0144 0x64 (enq) 5 0005 0x05 | % 37 0045 0x25 | E 69 0105 0x45 | e 101 0145 0x65 (ack) 6 0006 0x06 | & 38 0046 0x26 | F 70 0106 0x46 | f 102 0146 0x66 (bel) 7 0007 0x07 | ' 39 0047 0x27 | G 71 0107 0x47 | g 103 0147 0x67 (bs) 8 0010 0x08 | ( 40 0050 0x28 | H 72 0110 0x48 | h 104 0150 0x68 (ht) 9 0011 0x09 | ) 41 0051 0x29 | I 73 0111 0x49 | i 105 0151 0x69 (nl) 10 0012 0x0a | * 42 0052 0x2a | J 74 0112 0x4a | j 106 0152 0x6a (vt) 11 0013 0x0b | + 43 0053 0x2b | K 75 0113 0x4b | k 107 0153 0x6b (np) 12 0014 0x0c | , 44 0054 0x2c | L 76 0114 0x4c | l 108 0154 0x6c (cr) 13 0015 0x0d | - 45 0055 0x2d | M 77 0115 0x4d | m 109 0155 0x6d (so) 14 0016 0x0e | . 46 0056 0x2e | N 78 0116 0x4e | n 110 0156 0x6e (si) 15 0017 0x0f | / 47 0057 0x2f | O 79 0117 0x4f | o 111 0157 0x6f (dle) 16 0020 0x10 | 0 48 0060 0x30 | P 80 0120 0x50 | p 112 0160 0x70 (dc1) 17 0021 0x11 | 1 49 0061 0x31 | Q 81 0121 0x51 | q 113 0161 0x71 (dc2) 18 0022 0x12 | 2 50 0062 0x32 | R 82 0122 0x52 | r 114 0162 0x72 (dc3) 19 0023 0x13 | 3 51 0063 0x33 | S 83 0123 0x53 | s 115 0163 0x73 (dc4) 20 0024 0x14 | 4 52 0064 0x34 | T 84 0124 0x54 | t 116 0164 0x74 (nak) 21 0025 0x15 | 5 53 0065 0x35 | U 85 0125 0x55 | u 117 0165 0x75 (syn) 22 0026 0x16 | 6 54 0066 0x36 | V 86 0126 0x56 | v 118 0166 0x76 (etb) 23 0027 0x17 | 7 55 0067 0x37 | W 87 0127 0x57 | w 119 0167 0x77 (can) 24 0030 0x18 | 8 56 0070 0x38 | X 88 0130 0x58 | x 120 0170 0x78 (em) 25 0031 0x19 | 9 57 0071 0x39 | Y 89 0131 0x59 | y 121 0171 0x79 (sub) 26 0032 0x1a | : 58 0072 0x3a | Z 90 0132 0x5a | z 122 0172 0x7a (esc) 27 0033 0x1b | ; 59 0073 0x3b | [ 91 0133 0x5b | { 123 0173 0x7b (fs) 28 0034 0x1c | < 60 0074 0x3c | \ 92 0134 0x5c | | 124 0174 0x7c (gs) 29 0035 0x1d | = 61 0075 0x3d | ] 93 0135 0x5d | } 125 0175 0x7d (rs) 30 0036 0x1e | > 62 0076 0x3e | ^ 94 0136 0x5e | ~ 126 0176 0x7e (us) 31 0037 0x1f | ? 63 0077 0x3f | _ 95 0137 0x5f | (del)127 0177 0x7f
//-- JavaScript
var rawString = rawString.replace(/[\x00-\x20]/g, "");
//-- Java
String resultString = subjectString.replaceAll("[^\\x00-\\x20]", "");
// 總之就是把阿里阿雜會造成判斷錯誤的空字符,濾掉
// 取到 char(32) 是在我的案例中只需要過濾到這
在 MS-SQL中
resultString= replace(str,char(15),'')
來源資料:https://www.systutorials.com/4670/ascii-table-and-ascii-code/