为了应对当前HTML格式和js脚本对老人机的不友好,我们需要处理一下某些小说网站,让它回到十几年前的样子,剔除大量CSS和JS,精简HTML标签,这样就能够让老人机快速、友好地看小说了。

目前下面的脚本就能实现了,可以为老人机提供简单地浏览和必备的搜索功能,并且支持缓存。

简单说明

_remoteurl_:填写链接
_band_word_:删除关键词
_band_href_:删除URL包含该值的链接
_band_ele_:把某些没有替换掉的元素删除掉,这个设置的存在是由一个正则匹配引起的,一般不要修改。
关于缓存:在源码内搜索$keeptime即可找到相关代码。

快速配置

1. 基本配置

define处修改remoteurl和其他配置,注意看注释,别乱来。

2. 搜索配置

找到if ($_GET['search']) :这一行,下面有

1
$html = c_get('https://search2.booktxt.net/s.php', 'post', 't=1&keyword='.urlencode(_iconv($_GET['search'], 'GBK')));

这样一行,修改c_get的参数即可。
第一个参数是url,第二个是方式(post/get),第三个是传输过去的请求字符串,可能要用到转码功能,具体看对面页面的编码,用_iconv (脚本自构的函数) 转换。具体写什么需要手动抓取网站信息手动填好。

例子:

1
2
3
4
# define_remoteurl:  
define('_remoteurl_', 'https://www.biqutxt.com/');
# $_GET['search']后边:
$html = c_get('https://www.biqutxt.com/modules/article/search.php', 'post', 'searchtype=articlename&action=login&searchkey='.urlencode(_iconv($_GET['search'], 'GBK')));

若出现乱码,则可能为转码问题,也可能为原网页的问题,需排查,可以把URL上面的page参数进行urldecode+base64decode即得到源URL。

搬上完整代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
<?php
/*
author: foxnes/luuljh
*/
error_reporting(E_ALL ^ E_WARNING ^ E_NOTICE);
date_default_timezone_set("PRC");

define('_remoteurl_', 'https://m.booktxt.net/'); // 必须用 / 结尾 且 http(s)://开头
define('_band_word_', '字体:|顶点小说移动网(m\.booktxt\.net)|37小说网'); // 用 | 隔开,不能留空
define('_band_href_', 'cnzz\.com|mybook\.php');// 同上
define('_band_ele_', 'link');

$cachee = "cachepage/" . md5($_SERVER["QUERY_STRING"].'haha');

(file_exists("./cachepage/")) ?: mkdir('cachepage');
if (file_exists($cachee)) {
if (!$_GET['page'])
$keeptime = 259200; //保存3天
else
$keeptime = 31536000; //保存1年
if (time() - filectime($cachee) >= $keeptime){
unlink($cachee);
}else{
echo file_get_contents($cachee);
exit;
}
}

ob_start();

echo '<?xml version="1.0" encoding="UTF-8"?>';
?>
<!DOCTYPE html PUBLIC "-//WAPFORUM//DTD XHTML Mobile 1.2//EN"
"http://www.openmobilealliance.org/tech/DTD/xhtml-mobile12.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
<title><?php echo 'WT ' . ($_GET['page'] ? base64_decode($_GET['page']) : ''); ?></title>
<meta name="viewport" content="width=device-width, minimum-scale=1.0, maximum-scale=2.0"/>
<style type="text/css">body{font-size: 13px;line-height: 19px} p{margin: 3px;} a+a{display: block}</style>
</head>
<body>

<a href="?">回到主页</a>
<br />

<form action="" method="get">
搜索小说:
<input type="text" name="search" />
<button type='submit'>搜索</button>
</form>

<?php

if ($_GET['search']) :
$html = c_get('https://search2.booktxt.net/s.php', 'post', 't=1&keyword='.urlencode(_iconv($_GET['search'], 'GBK')));
elseif ($_GET['page']):
$html = c_get(base64_decode($_GET['page']));
else:
$html = c_get(_remoteurl_);
endif;

if (!preg_match('/200 OK/i', $html[1])) {
echo '服务器可能出现了点问题噢(⊙o⊙)?';
echo "<br />返回:".str_replace(PHP_EOL, '<br />', $html[1]);
$cachee = false;
goto foot;
}
$html = _iconv($html[0]);
$html = preg_replace('/<(style|script)[^>]*?>[\s\S]*?<\/\1>/i', '', $html);
$html = preg_replace('/<(?:'._band_ele_.')[^>]*?>/i', '', $html);
$html = preg_replace('/<(?!a|\/a|p|\/p|br|li|\/li|table|\/table|td|\/td|tr|\/tr)(?:[^>]*?)>/i', '', $html);
$html = preg_replace('/(id|class|title|style|target|alt|onclick)=("|\').*?\2/i', '', $html);
$html = preg_replace('/<a[^>]*?=[^>]*?(?:javascript\:|'._band_href_.')[\s\S]*?<\/a>/i', '', $html);
$html = preg_replace('/[\n\r\s]+|(&nbsp;)+/i', ' ', $html);
$html = preg_replace('/'._band_word_.'/i', '', $html);
$html = preg_replace('/<p[^>]*?><\/p>|<a >.*?<\/a>|<a[^>]*?><\/a>/i', '', $html);

preg_match_all('/<a[^>]*?href=("|\')([^>]*?)\1/i', $html, $links);
$rep = [];
$rem = [];
foreach ($links[2] as $key => $value) {
if (!(strlen($links[2][$key]) > 5)) continue;
$qt = $links[1][$key];
$rep[] = $qt . $links[2][$key] . $qt;
$rem[] = $qt . "?page=".urlencode(base64_encode(rel2abs($value))) . $qt;
}
if (count($links) > 1)
$html = str_replace($rep, $rem, $html);
echo $html;

foot:
?>

<br />
<a href="?">回到主页</a>
<small>[<?php echo date("y-m-d H:i:s"); ?>]</small>

</body>
</html><?php

$html = ob_get_clean();

echo $html;

if ($cachee)
file_put_contents($cachee, $html);

function c_get($url, $method = 'get', $data = '', $referer = _remoteurl_, $timeout = 10, $useck = false, $saveck = false, $ckfile = "ck.txt") {
$headerinfo = array(
"User-Agent: Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"
);
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_HTTPHEADER, $headerinfo);
curl_setopt($ch, CURLOPT_TIMEOUT_MS, $timeout * 1000);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_REFERER, $referer);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_HEADER, true);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
if ($saveck == true) {
curl_setopt($ch, CURLOPT_COOKIEJAR, $ckfile);
}
if (file_exists($ckfile) && $useck == true) {
curl_setopt($ch, CURLOPT_COOKIEFILE, $ckfile);
}
if ($method == "post") {
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, $data);
}
$content = curl_exec($ch);
if (curl_errno($ch)) {
return 'Curl error: ' . curl_error($ch);
}
if ($content == false) {
return "Get content false!";
}
$headerSize = curl_getinfo($ch, CURLINFO_HEADER_SIZE);
$header = substr($content, 0, $headerSize);
$body = substr($content, $headerSize);
if (in_array(curl_getinfo($ch, CURLINFO_HTTP_CODE), ['301','302'])) {
preg_match("@location: (.*?)[\n\r;]@i", $header, $tmpgo);
curl_close($ch);
return c_get($tmpgo[1]);
}
curl_close($ch);
$content = array(
$body,
$header
);
return $content;
}

function rel2abs($n){
if ($_GET['page']) {
$fix = pathinfo(base64_decode(urldecode($_GET['page'])));
$fix = $fix["dirname"]."/";
}else{
$fix = _remoteurl_;
}
if (strpos($n,"#") !== false) {
$n = substr($n, 0, strpos($n,"#"));
}
if (substr($n, 0, 7) == "http://" || substr($n, 0, 8) == "https://") {
return $n;
}elseif (substr($n, 0, 2) == "//") {
return "http:".$n;
}else{
if (empty($n)) {
return false;
}
if (substr($n, 0, 1) == "/") {
return _remoteurl_.substr($n, 1);
}else{
return $fix.$n;
}
}
}

function _iconv($data, $output = 'utf-8') {
$encode_arr = array('UTF-8','ASCII','GBK','GB2312','BIG5','JIS','eucjp-win','sjis-win','EUC-JP');
$encoded = mb_detect_encoding($data, $encode_arr);
if (!is_array($data)) {
return mb_convert_encoding($data, $output, $encoded);
} else {
foreach ($data as $key=>$val) {
$key = _iconv($key, $output);
if(is_array($val)) {
$data[$key] = _iconv($val, $output);
} else {
$data[$key] = mb_convert_encoding($data, $output, $encoded);
}
}
return $data;
}
}