CMSimple_XH 開発者ドキュメント
LinkChecker.php
1 <?php
2 
16 namespace XH;
17 
31 {
41  public function prepare()
42  {
43  global $sn, $pth, $tx;
44 
45  $url = $sn . '?&amp;xh_do_validate';
46  $o = '<div id="xh_linkchecker" data-url="' . $url . '">'
47  . '<img src="' . $pth['folder']['corestyle']
48  . 'ajax-loader-bar.gif" width="128" height="15" alt="'
49  . $tx['link']['checking'] . '">'
50  . '</div>';
51  return $o;
52  }
53 
59  public function doCheck()
60  {
61  header('Content-Type: text/plain; charset=utf-8');
62  echo $this->checkLinks();
63  exit;
64  }
65 
71  public function checkLinks()
72  {
73  $links = $this->gatherLinks();
74  $failure = array(
75  400, 404, 500, Link::STATUS_INTERNALFAIL,
76  Link::STATUS_EXTERNALFAIL, Link::STATUS_CONTENT_NOT_FOUND,
77  Link::STATUS_FILE_NOT_FOUND, Link::STATUS_ANCHOR_MISSING
78  );
79  $hints = array();
80  foreach ($links as $index => $currentLinks) {
81  foreach ($currentLinks as $link) {
82  $this->determineLinkStatus($link);
83  if ($link->getStatus() !== 200) {
84  $type = in_array($link->getStatus(), $failure)
85  ? 'errors' : 'caveats';
86  $hints[$index][$type][] = $link;
87  }
88  }
89  }
90  return $this->message($this->countLinks($links), $hints);
91  }
92 
102  private function gatherLinks()
103  {
104  global $c, $u, $cl;
105 
106  $links = array();
107  for ($i = 0; $i < $cl; $i++) {
108  $links[$i] = array();
109  $pattern = '/<a.*?href=["]?([^"]*)["]?.*?>(.*?)<\/a>/is';
110  preg_match_all($pattern, $c[$i], $pageLinks);
111  if (count($pageLinks[1]) > 0) {
112  foreach ($pageLinks[1] as $j => $url) {
113  $url = str_replace('&amp;', '&', $url);
114  if (strpos($url, '#') === 0) {
115  $url = '?' . $u[$i] . $url;
116  }
117  $text = $pageLinks[2][$j];
118  $links[$i][] = new Link($url, $text);
119  }
120  }
121  }
122  return $links;
123  }
124 
132  private function countLinks(array $links)
133  {
134  return array_sum(array_map('count', $links));
135  }
136 
144  public function determineLinkStatus(Link $link)
145  {
146  $parts = parse_url($link->getURL());
147  if (isset($parts['scheme'])) {
148  switch ($parts['scheme']) {
149  case 'http':
150  $status = $this->checkExternalLink($parts);
151  break;
152  case 'mailto':
153  $status = Link::STATUS_MAILTO;
154  break;
155  case '':
156  $status = $this->checkInternalLink($parts);
157  break;
158  default:
159  $status = Link::STATUS_UNKNOWN;
160  }
161  } else {
162  $status = $this->checkInternalLink($parts);
163  }
164  $link->setStatus($status);
165  }
166 
180  private function checkInternalLink(array $test)
181  {
182  global $c, $u, $cl, $pth, $cf;
183 
184  if (isset($test['path']) && !isset($test['query'])) {
185  $filename = urldecode($test['path']);
186  if (is_file($filename) && is_readable($filename)) {
187  return 200;
188  }
189  }
190  if (!isset($test['query'])) {
191  return Link::STATUS_INTERNALFAIL;
192  }
193 
194  list($query) = explode('&', $test['query']);
195  if ($query === 'sitemap'
196  || $query === 'mailform' && $cf['mailform']['email'] !== ''
197  ) {
198  return 200;
199  }
200  $contentLength = $cl;
201  if (isset($test['path'])
202  && preg_match('/\/([A-z]{2})\/[^\/]*/', $test['path'], $matches)
203  && XH_isLanguageFolder($matches[1])
204  ) {
205  $lang = $matches[1];
206  }
207  if (isset($lang)) {
208  $query = str_replace('/' . $lang . '/?', '', $query);
209  $content = XH_readContents($lang);
210  if (!$content) {
211  return Link::STATUS_CONTENT_NOT_FOUND;
212  }
213  $urls = $content['urls'];
214  $pages = $content['pages'];
215  $contentLength = count($pages);
216  } else {
217  $urls = $u;
218  $pages = $c;
219  }
220  for ($i = 0; $i < $contentLength; $i++) {
221  if ($urls[$i] === $query) {
222  if (!isset($test['fragment'])) {
223  return 200;
224  }
225  $pattern = '/<[^>]*[id|name]\s*=\s*"' . $test['fragment'] . '"/i';
226  if (preg_match($pattern, $pages[$i])) {
227  return 200;
228  }
229  // check for anchor in template
230  $template = file_get_contents($pth['file']['template']);
231  $pattern = '/<[^>]*[id|name]\s*=\s*"' . $test['fragment'] . '"/i';
232  if (preg_match($pattern, $template)) {
233  return 200;
234  }
235  return Link::STATUS_ANCHOR_MISSING;
236  }
237  }
238  $parts = explode('=', $test['query']);
239  $temp = array('download', '&download', '&amp;download');
240  if (in_array($parts[0], $temp)) {
241  if (file_exists($pth['folder']['downloads'] . $parts[1])) {
242  return 200;
243  } else {
244  return Link::STATUS_FILE_NOT_FOUND;
245  }
246  }
247  return Link::STATUS_INTERNALFAIL;
248  }
249 
257  private function checkExternalLink(array $parts)
258  {
259  set_time_limit(30);
260  $path = isset($parts['path']) ? $parts['path'] : '/';
261  if (isset($parts['query'])) {
262  $path .= "?" . $parts['query'];
263  }
264  $status = $this->makeHeadRequest($parts['host'], $path);
265  return ($status !== false) ? $status : Link::STATUS_EXTERNALFAIL;
266  }
267 
277  protected function makeHeadRequest($host, $path)
278  {
279  $errno = $errstr = null;
280  $socket = fsockopen($host, 80, $errno, $errstr, 5);
281  if ($socket) {
282  $request = "HEAD $path HTTP/1.1\r\nHost: $host\r\n"
283  . "User-Agent: CMSimple_XH Link-Checker\r\n\r\n";
284  fwrite($socket, $request);
285  $response = fread($socket, 12);
286  fclose($socket);
287  $status = substr($response, 9);
288  return (int) $status;
289  } else {
290  return false;
291  }
292  }
293 
301  public function reportError(Link $link)
302  {
303  global $tx;
304 
305  $o = '<li>' . "\n" . '<b>' . $tx['link']['link'] . '</b>'
306  . '<a href="' . $link->getURL() . '">' . $link->getText() . '</a>'
307  . '<br>' . "\n"
308  . '<b>' . $tx['link']['linked_page'] . '</b>' . $link->getURL()
309  . '<br>' . "\n"
310  . '<b>' . $tx['link']['error'] . '</b>';
311  switch ($link->getStatus()) {
312  case Link::STATUS_INTERNALFAIL:
313  case Link::STATUS_CONTENT_NOT_FOUND:
314  $o .= $tx['link']['int_error'];
315  break;
316  case Link::STATUS_ANCHOR_MISSING:
317  $o .= $tx['link']['int_error_fragment'];
318  break;
319  case Link::STATUS_EXTERNALFAIL:
320  $o .= $tx['link']['ext_error_domain'];
321  break;
322  default:
323  $o .= $tx['link']['ext_error_page'] . '<br>' . "\n"
324  . '<b>' . $tx['link']['returned_status'] . '</b>'
325  . $link->getStatus();
326  }
327  $o .= "\n" . '</li>' . "\n";
328  return $o;
329  }
330 
338  public function reportNotice(Link $link)
339  {
340  global $tx;
341 
342  $o = '<li>' . "\n" . '<b>' . $tx['link']['link'] . '</b>'
343  . '<a href="' . $link->getURL() . '">' . $link->getText() . '</a>'
344  . '<br>' . "\n"
345  . '<b>' . $tx['link']['linked_page'] . '</b>'
346  . $link->getURL() . '<br>' . "\n";
347  switch ($link->getStatus()) {
348  case Link::STATUS_MAILTO:
349  $o .= $tx['link']['email'] . "\n";
350  break;
351  case Link::STATUS_UNKNOWN:
352  $o .= $tx['link'][Link::STATUS_UNKNOWN] . "\n";
353  break;
354  default:
355  if ($link->getStatus() >= 300 && $link->getStatus() < 400) {
356  $o .= '<b>' . $tx['link']['error'] . '</b>'
357  . $tx['link']['redirect'] . '<br>' . "\n";
358  }
359  $o .= '<b>' . $tx['link']['returned_status'] . '</b>'
360  . $link->getStatus() . "\n";
361  }
362  return $o;
363  }
364 
377  public function message($checkedLinks, array $hints)
378  {
379  global $tx, $h, $u;
380 
381  $key = 'checked' . XH_numberSuffix($checkedLinks);
382  $text = sprintf($tx['link'][$key], $checkedLinks);
383  $o = "\n" . '<p>' . $text . '</p>' . "\n";
384  if (count($hints) === 0) {
385  $o .= '<p><b>' . $tx['link']['check_ok'] . '</b></p>' . "\n";
386  return $o;
387  }
388  $o .= '<p><b>' . $tx['link']['check_errors'] . '</b></p>' . "\n";
389  $o .= '<p>' . $tx['link']['check'] . '</p>' . "\n";
390  foreach ($hints as $page => $problems) {
391  $o .= '<hr>' . "\n\n" . '<h4>' . $tx['link']['page']
392  . '<a href="?' . $u[$page] . '">' . $h[$page] . '</a></h4>' . "\n";
393  if (isset($problems['errors'])) {
394  $o .= '<h5>' . $tx['link']['errors'] . '</h5>' . "\n"
395  . '<ul>' . "\n";
396  foreach ($problems['errors'] as $link) {
397  $o .= $this->reportError($link);
398  }
399  $o .= '</ul>' . "\n" . "\n";
400  }
401  if (isset($problems['caveats'])) {
402  $o .= '<h5>' . $tx['link']['hints'] . '</h5>' . "\n"
403  . '<ul>' . "\n";
404  foreach ($problems['caveats'] as $link) {
405  $o .= $this->reportNotice($link);
406  }
407  $o .= '</ul>' . "\n";
408  }
409  }
410  return $o;
411  }
412 }
XH_isLanguageFolder($name)
Definition: functions.php:1850
$j
Definition: cms.php:204
$i
Definition: cms.php:193
$cf
Definition: cms.php:272
XH_numberSuffix($count)
Definition: functions.php:2278
$c
Definition: cms.php:964
$u
Definition: cms.php:990
$o
Definition: cms.php:113
$temp
Definition: cms.php:182
$h
Definition: cms.php:977
$sn
Definition: cms.php:434
$tx
Definition: cms.php:363
$text
Definition: cms.php:698
$pth
Definition: cms.php:230
$cl
Definition: cms.php:918
XH_readContents($language=null)
Definition: functions.php:765