📄 正在查看:twcms/kongphp/ext/utf8.class.php
1<?php
2/**
3 * Copyright (C) 2013-2014 www.kongphp.com All rights reserved.
4 * Licensed http://www.gnu.org/licenses/lgpl.html
5 * Author: wuzhaohuan <kongphp@gmail.com>
6 * @link http://www.xiuno.com/
7 */
8
9if(!defined('FRAMEWORK_UTF8')) {
10 if(extension_loaded('mbstring')) {
11 mb_internal_encoding('UTF-8');
12 define('FRAMEWORK_UTF8', TRUE);
13 }else{
14 define('FRAMEWORK_UTF8', FALSE);
15 }
16}
17
18class utf8{
19 public static function substr($str, $offset, $length = NULL) {
20 if(FRAMEWORK_UTF8) {
21 return mb_substr($str, $offset, $length, 'UTF-8');
22 }
23 if(self::is_ascii($str)) {
24 return ($length === NULL) ? substr($str, $offset) : substr($str, $offset, $length);
25 }
26
27 $str = (string)$str;
28 $strlen = self::strlen($str);
29 $offset = (int)($offset < 0) ? max(0, $strlen + $offset) : $offset;
30 $length = ($length === NULL) ? NULL : (int)$length;
31
32 if($length === 0 OR $offset >= $strlen OR ($length < 0 AND $length <= $offset - $strlen)) {
33 return '';
34 }
35
36 if($offset == 0 AND ($length === NULL OR $length >= $strlen)) {
37 return $str;
38 }
39
40 $regex = '^';
41
42 if ($offset > 0) {
43 $x = (int)($offset / 65535);
44 $y = (int)($offset % 65535);
45 $regex .= ($x == 0) ? '' : '(?:.{65535}){'.$x.'}';
46 $regex .= ($y == 0) ? '' : '.{'.$y.'}';
47 }
48
49 if($length === NULL) {
50 $regex .= '(.*)';
51 }elseif($length > 0) {
52 $length = min($strlen - $offset, $length);
53
54 $x = (int)($length / 65535);
55 $y = (int)($length % 65535);
56 $regex .= '(';
57 $regex .= ($x == 0) ? '' : '(?:.{65535}){'.$x.'}';
58 $regex .= '.{'.$y.'})';
59 }else{
60 $x = (int)(-$length / 65535);
61 $y = (int)(-$length % 65535);
62 $regex .= '(.*)';
63 $regex .= ($x == 0) ? '' : '(?:.{65535}){'.$x.'}';
64 $regex .= '.{'.$y.'}';
65 }
66
67 preg_match('/'.$regex.'/us', $str, $matches);
68 return $matches[1];
69 }
70
71 public static function cutstr_cn($s, $len, $more = '...') {
72 $n = strlen($s);
73 $r = '';
74 $rlen = 0;
75
76 // 32, 64
77 $UTF8_1 = 0x80;
78 $UTF8_2 = 0x40;
79 $UTF8_3 = 0x20;
80
81 for($i=0; $i<$n; $i++) {
82 $c = '';
83 $ord = ord($s[$i]);
84 if($ord < 127) {
85 $rlen++;
86 $r .= $s[$i];
87 } elseif(($ord & $UTF8_1) && ($ord & $UTF8_2) && ($ord & $UTF8_3)) {
88 // 期望后面的字符满足条件,否则抛弃 && ord($s[$i+1]) & $UTF8_2
89 if($i+1 < $n && (ord($s[$i+1]) & $UTF8_1)) {
90 if($i+2 < $n && (ord($s[$i+2]) & $UTF8_1)) {
91 $rlen += 2;
92 $r .= $s[$i].$s[$i+1].$s[$i+2];
93 }else{
94 $i += 2;
95 }
96 } else {
97 $i++;
98 }
99 }
100 if($rlen >= $len) break;
101 }
102
103 $n > strlen($r) && $r .= $more;
104
105 return $r;
106 }
107
108 // 安全截取,防止SQL注射
109 public static function safe_substr($str, $offset, $length = NULL) {
110 $str = self::substr($str, $offset, $length);
111 $len = strlen($str) - 1;
112 if($len >=0) {
113 if($str[$len] == '\\') $str[$len] = '';
114 }
115 return $str;
116 }
117
118 public static function is_ascii($str) {
119 return !preg_match('/[^\x00-\x7F]/S', $str);
120 }
121
122 public static function strlen($str) {
123 if(FRAMEWORK_UTF8) {
124 return mb_strlen($str);
125 }
126 if(self::is_ascii($str)) {
127 return strlen($str);
128 }else{
129 return strlen(utf8_decode($str));
130 }
131 }
132}
133