CMSimple_XH 開発者ドキュメント
utf8.php
[詳解]
1 <?php
2 
28 function utf8_strlen($string)
29 {
30  return mb_strlen($string, 'UTF-8');
31 }
32 
42 function utf8_substr($string, $offset, $length = null)
43 {
44  return mb_substr($string, $offset, $length, 'UTF-8');
45 }
46 
59 function utf8_strtolower($string)
60 {
61  return mb_strtolower($string, 'UTF-8');
62 }
63 
76 function utf8_strtoupper($string)
77 {
78  return mb_strtoupper($string, 'UTF-8');
79 }
80 
91 function utf8_strpos($haystack, $needle, $offset = 0)
92 {
93  return mb_strpos($haystack, $needle, $offset, 'UTF-8');
94 }
95 
106 function utf8_stripos($haystack, $needle, $offset = 0)
107 {
108  return mb_stripos($haystack, $needle, $offset, 'UTF-8');
109 }
110 
118 function utf8_ucfirst($string)
119 {
120  switch (utf8_strlen($string)) {
121  case 0:
122  return '';
123  case 1:
124  return utf8_strtoupper($string);
125  default:
126  preg_match('/^(.{1})(.*)$/us', $string, $matches);
127  return utf8_strtoupper($matches[1]) . $matches[2];
128  }
129 }
130 
139 function utf8_is_valid($string)
140 {
141  if (strlen($string) == 0) {
142  return true;
143  }
144  // If even just the first character can be matched, when the /u
145  // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
146  // invalid, nothing at all will match, even if the string contains
147  // some valid sequences
148  return (bool) preg_match('/^.{1}/us', $string);
149 }
150 
167 function utf8_bad_replace($string, $replace = '?')
168 {
169  $bad = '([\x00-\x7F]' // ASCII (including control chars)
170  . '|[\xC2-\xDF][\x80-\xBF]' // non-overlong 2-byte
171  . '|\xE0[\xA0-\xBF][\x80-\xBF]' // excluding overlongs
172  . '|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}' // straight 3-byte
173  . '|\xED[\x80-\x9F][\x80-\xBF]' // excluding surrogates
174  . '|\xF0[\x90-\xBF][\x80-\xBF]{2}' // planes 1-3
175  . '|[\xF1-\xF3][\x80-\xBF]{3}' // planes 4-15
176  . '|\xF4[\x80-\x8F][\x80-\xBF]{2}' // plane 16
177  . '|(.{1}))'; // invalid byte
178  $result = '';
179  while (preg_match('/' . $bad . '/S', $string, $matches)) {
180  if (!isset($matches[2])) {
181  $result .= $matches[0];
182  } else {
183  $result .= $replace;
184  }
185  $string = substr($string, strlen($matches[0]));
186  }
187  return $result;
188 }
utf8_substr($string, $offset, $length=null)
Definition: utf8.php:42
utf8_strtolower($string)
Definition: utf8.php:59
utf8_ucfirst($string)
Definition: utf8.php:118
utf8_strlen($string)
Definition: utf8.php:28
utf8_strtoupper($string)
Definition: utf8.php:76
utf8_strpos($haystack, $needle, $offset=0)
Definition: utf8.php:91
utf8_is_valid($string)
Definition: utf8.php:139
utf8_bad_replace($string, $replace='?')
Definition: utf8.php:167
utf8_stripos($haystack, $needle, $offset=0)
Definition: utf8.php:106