user-pic

[求助]能不能帮我修改一下代码--{急}

Vote 0 Votes

下载了一个CGI程序,是将网页从将简体转繁体的程序


http://www.51blue.com/g2b.cgi?w=1&url=http://www.51blue.com

http://www.51blue.com/g2b.cgi?w=1&url=http://www.51blue.com/list.asp?id=206

格式可以正常显示

但是地址如果为
http://www.51blue.com/g2b.cgi?w=1&url=http://www.51blue.com/class.asp?lx=small&anid=20&nid=218

程序就会把地址过滤为

http://www.51blue.com/class.asp?lx=small

我找了半天也不知道程序是在哪里做的过滤,请大家帮我一下

-------------------代码如下-------------------------------------------------
#!/usr/bin/perl

####################################################
#作者: 飞云小侠 scud@126.com
#       网站: http://www.coolwww.net
#
#简体繁体网页互相翻译(BIG5 to GB)
#      
#
#日期:1999年11月
####################################################
#
#       您可以修改代码或者版面
#但必须保留上述版权声明
#       未经许可不得用于商业用途
#    
####################################################
#
#文件:b2g.cgi g2b.cgi,
#模板:g2b.hhh b2g.hhh
#库  :codelib.pl cgi-lib.pl
#

#use lib "/home/coolwww/mylib";

# 必须有libwww模块的支持,请咨询你的管理员
use LWP;

require "cgi-lib.pl"; # 常规模块
require "codelib2.pl"; # 翻译模块

#此程序的网址
$tfiledir ="./";
@alltfile = ("g2b.hhh","b2g.hhh");

$prgurl ="http://www.coolwww.net/cgi-bin/bg/";
#$prgurl ="http://zyf/cgi-bin/";
@allprgurl = ("g2b.cgi","b2g.cgi");
@allprgname = ("Gb->Big5呼陆亩","Big5->Gb网页翻译");
@allcode = qw(big5 gb hz utf8 ascii other);
#当前程序类型
$prgtype=0;  # 0 :g2b 1:b2g
#广告开放
$advopen =0;

$gbfile = "/home1/scud/cooldata/gblog.txt";
#读取网址
&ReadParse;

$curprg=$allprgurl[$prgtype];
$thisprg=$prgurl.$curprg;
$prgname=$allprgname[$prgtype];

if ($in{'url'})
{
print "Content-type: text/html\n\n";
$yoururl = $in{'url'};
     if (length($yoururl) > 500 )
{
&yourerr("您输入的网址太长了! (Your web address is too long!)");
}
if (!($yoururl   =~ /^http:\/\/.*\..*/i))
{
&yourerr("你输入了一个错误的网页地址,格式为 http://yourname.net (You input a error web address ,the format is http://yourname.net)");
}

#记录访问的网址,如果需要记录,请去掉注释,修改路径,并定期清理此log
#&ip;

if ( $yoururl =~ /$curprg/i )
{
&yourerr("抱歉,为了避免循环嵌套,目标网址不能包含\"$curprg\"!!!或者本页已经是相应的编码了.(Sorry,the Address can't include my self!!! or the page is in the same encode)");
}

my $ua = new LWP::UserAgent;

$ua->agent('libwww-perl/$LWP::VERSION');
$ua->timeout(600);
#$ua->proxy(['http', 'ftp'], 'http://proxy.so.unis:8080/');

#获取网页内容
my $req = new HTTP::Request(GET => $yoururl);
#$req->header("Accept" => "text/html");
$req->header("user_agent" => "libwww-perl/$LWP::VERSION");
$req->header("Referer" => "http://www.coolwww.net/");
$req->header("server" => "coolwww.net");
$req->header("from" => "coolwww\@coolwww.net");

my $res = $ua->request($req);

#my $res = $ua->request(HTTP::Request->new(GET => $yoururl));

 if (!($res->is_success) )
 {
 $myerrmsg ="错误的网址或者操作超时:$yoururl<br>Error Message :".$res->message();
 
 #$myerrmsg ="错误的网址或者操作超时(error address or timeout):$yoururl";
&yourerr($myerrmsg);
}

if (!($res->content_type eq 'text/html') )
{
&yourerr("此目标网址不是html文件(This Address is not a html file): $yoururl");
}

my $tbase = $res->base;

$durlcontent =$res->content;

#如果没有基准网址,加上
$linkbase ="<head>\n<base href=$tbase>";
$oldhead ="<head>";
if (!($durlcontent =~ /<base\s+[^><]*href=/))
{
$durlcontent =~s/$oldhead/$linkbase/i;
}

#替换编码,如果已经是目标编码了,就不要替换了
@thiscode=&codeguess($durlcontent);
$curcode =$thiscode[0];

$okurlcontent = $durlcontent;
if ($prgtype == 0 )
{
&changecode('gb2312','big5');
if ($curcode ne $allcode[$prgtype] )
{$okurlcontent= &gb2big5($durlcontent);  }
}
elsif ( ($prgtype == 1 ) )
{
&changecode('big5','gb2312');
if ($curcode ne $allcode[$prgtype] )
{$okurlcontent= &big52gb($durlcontent); }
}

#广告事业
&myadv;

#进行链接替换
if ($in{'w'}==1)
{
#全替换:全换成使用本翻译程序的链接 http://www.coolwww.net/cgi-bin/b2g.cgi?w=1&url=http://xxx.xxx.com/xxx.htm
$curroot = &getmyroot($tbase);
$curbase = &getmybase($tbase);
#如果是javascript等的链接如何处理?
#1是绝对链接,加上程序名即可
#2是根链接,加上程序名和根路径
#3是相对链接,加上程序名和基本路径
#4是其他链接,例如javascript,不作处理
$myadd ="$thisprg?w=1&url=";
$okurlcontent =~ s#(<a\s+[^><]*href=)("?)(http://[^"><:]+)\2([^><]*>)#\1\2$myadd\3\2\4#gi;
$okurlcontent =~ s#(<a\s+[^><]*href=)("?)(/[^"><:]+)\2([^><]*>)#\1\2$myadd$curroot\3\2\4#gi;
#
$okurlcontent =~ s#(<a\s+[^><]*href=)("?)([^"><:]+)\2(>)#\1\2$myadd$curbase\3\2\4#gi;
$okurlcontent =~ s#(<a\s+[^><]*href=)("?)([^"><:]+)\2(\s+[^><]*>)#\1\2$myadd$curbase\3\2\4#gi;
}

print $okurlcontent;

exit;
}
else
{
print "Content-type: text/html\n\n";

@tfile=&readtfile("$tfiledir$alltfile[$prgtype]");
     foreach(@tfile)
     {
       $_ =~ s/\$thisprg/$thisprg/g;
       $_ =~ s/\$prgname/$prgname/g;
       $_ =~ s/\$imgurl/$imgurl/g;
   print "$_";
     }

exit;        

}

1;

#做点广告
sub myadv
{
if ($advopen == 1)  
{
$tsite=$yoururl;
if ( $tsite =~ /.*coolwww\.net.*/i )  { }
else{
$advcode = qq~
<div align=center><center><table width=60%>
<tr><td height=10></td></tr>
<tr><td width=100% align=center><small>$prgname: <a href=http://www.coolwww.net target=_blank>coolwww.net</a></small></td></tr>
</table></center></div></body>
~;
$oldendbody = "</body>";
$okurlcontent =~s/$oldendbody/$advcode/i;
}
}


}

#写入文本

sub ip {

# get old ip data

open(FILE, "$gbfile");

@ipinfo = <FILE>;
close(FILE);
$numip = @ipinfo;
($newip) = "$curprg$yoururl";
$ipfound = 0;
$total = 0;

# add 1 to current ip

for ($a = 0; $a < $numip; $a++) {

($ipcount[$a],$ipaddr[$a], $nochop) = split(/'/,$ipinfo[$a]);

if ($ipaddr[$a] eq $newip) {
   $ipcount[$a] ++;
   $ipfound = 1;
}

$total = $total + $ipcount[$a];

}

# if this ip is new, create a new piece of data for it

if ($ipfound == 0) {
$ipaddr[$numip] = $newip;
$ipcount[$numip] = 1;
$numip++;
$total++;
}

# the famous "Gugliotta Bubliotta" sort

for ($a = 0; $a < $numip; $a++) {
for ($b = ($numip - 1); $b > $a; $b--) {

   if ($ipcount[$b] > $ipcount[$b-1]) {
      $temp1 = $ipaddr[$b-1];
      $ipaddr[$b-1] = $ipaddr[$b];
      $ipaddr[$b] = $temp1;
      $temp2 = $ipcount[$b-1];
      $ipcount[$b-1] = $ipcount[$b];
      $ipcount[$b] = $temp2;
   }

}
}

# write ip data

open(FILE, ">$gbfile");

for ($a = 0; $a <= $numip; $a++) {

if ($ipcount[$a] > 0) {  
print FILE "$ipcount[$a]'$ipaddr[$a]'0<br>\n";
}

}

close(FILE);

} # end sub ip



#读取模板文件
sub readtfile
{
open(READTFILE,"$_[0]");
@readtfile=<READTFILE>;
close(READTFILE);
return @readtfile;
}


#替换编码语句:有指定编码的语句才替换.
sub changecode
{
local($oldcode,$newcode)=@_;
$newstr ="<head>\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=$newcode\">";
$oldhead ="<head>";
$olddurlcontent =$durlcontent;
$durlcontent =~ s#<meta\s+http-equiv=("?)Content-Type\1\s+content=("?)text/html;\s+charset=[^><"]*$oldcode[^><"]*(\2)\s*>##i;
$durlcontent =~ s#<meta\s+content=("?)text/html;\s+charset=[^><]*$oldcode[^><"]*(\1)\s+http-equiv=("?)Content-Type(\2)\s*>##i;
if ($olddurlcontent ne $durlcontent)
{
$durlcontent =~ s/$oldhead/$newstr/i;
}
}


sub yourerr {
print <<YourErr;
<HTML>
<HEAD><title>发生错误Error</title></head>
<BODY>
<ul>
<table border=0>
<tr><td><font color="#ff0000"><b>发生错误(Error)</b><font></td></tr>
<tr><td><b>$_[0]</b><br><br>
<A HREF="javascript:history.go(-1);">返回上一页(Back)</A>
</td></tr></table>
<BR><BR><BR>
</body></html>
YourErr
exit;
}

#得到当前根路径
sub getmyroot
{
local($curroot)=@_;
$curroot = $curroot."/";
$curroot =~ s/(^http:\/\/[^\/]*)\/.*/\1/i;
$curroot;
}

#得到当前相对路径
sub getmybase
{
local($baseurl)=@_;
$baseurl =~ s#(^http://.*)/[^\.]*\.[^/]*#\1#i;
$baseurl = $baseurl."/";
$baseurl =~ s#//$#/#;
$baseurl;

}








谢谢各位

11 Replies

| Add a Reply


  • 不是过滤
    是根本没接收

    看错,接收呢,用in函数

  • 代码最好用

    [ /code] 括起来。不然看的好累。。

    原因很简单。程序太好了。。。只处理最后一个 ? 后面的地址。。

    而这段代码是出在函数 ReadParse 中的。。而你并没有提供这个函数。。也就是这个文件没有办法修改,因为原因不在这个文件中。

    或者。。你可以这样子改?
    [code]&ReadParse;

    改成

    use CGI;
    my $q=new CGI;
    $in_url = $q->param('url');

    然后。。把所有的
    $in{'url'} 替换成 $in_url。。

    再寒下。。这个程序实在太老了。。。。。。。:(

  • 这个程式好像是咱们PERLCHINA现任老大
    风云howwa写的。。。。。。。

  • 下面引用由曾子程在 2005/3/17 08:36pm 发表的内容:
    这个程式好像是咱们PERLCHINA现任老大
    风云howwa写的。。。。。。。

    好象是他。呵呵。 :em05:
  • 不好意思,最近一直上不来,小区的网络有问题

    我把程序打包上来吧,实在很难看懂,不过这样的程序很实用.希望能多学习了.

    http://easun.org/upload/early/0415_1111835808.zip


  • sub ReadParse {
     local (*in) = shift if @_;    # CGI input
     local (*incfn,                # Client's filename (may not be provided)
    *inct,                 # Client's content-type (may not be provided)
    *insfn) = @_;          # Server's filename (for spooled files)
     local ($len, $type, $meth, $errflag, $cmdflag, $perlwarn, $got);

     # Disable warnings as this code deliberately uses local and environment
     # variables which are preset to undef (i.e., not explicitly initialized)
     $perlwarn = $^W;
     $^W = 0;

     binmode(STDIN);   # we need these for DOS-based systems
     binmode(STDOUT);  # and they shouldn't hurt anything else
     binmode(STDERR);

     # Get several useful env variables
     $type = $ENV{'CONTENT_TYPE'};
     $len  = $ENV{'CONTENT_LENGTH'};
     $meth = $ENV{'REQUEST_METHOD'};
     
     if ($len > $cgi_lib'maxdata) { #'
         &CgiDie("cgi-lib.pl: Request to receive too much data: $len bytes\n");
     }
     
     if (!defined $meth || $meth eq '' || $meth eq 'GET' ||
         $type eq 'application/x-www-form-urlencoded') {
       local ($key, $val, $i);

       # Read in text
       if (!defined $meth || $meth eq '') {
         $in = $ENV{'QUERY_STRING'};
         $cmdflag = 1;  # also use command-line options
       } elsif($meth eq 'GET' || $meth eq 'HEAD') {
         $in = $ENV{'QUERY_STRING'};
       } elsif ($meth eq 'POST') {
           if (($got = read(STDIN, $in, $len) != $len))
     {$errflag="Short Read: wanted $len, got $got\n"};
       } else {
         &CgiDie("cgi-lib.pl: Unknown request method: $meth\n");
       }

       @in = split(/[&;]/,$in);
       push(@in, @ARGV) if $cmdflag; # add command-line parameters

       foreach $i (0 .. $#in) {
         # Convert plus to space
         $in[$i] =~ s/\+/ /g;

         # Split into key and value.  
         ($key, $val) = split(/=/,$in[$i],2); # splits on the first =.

         # Convert %XX from hex numbers to alphanumeric
         $key =~ s/%([A-Fa-f0-9]{2})/pack("c",hex($1))/ge;
         $val =~ s/%([A-Fa-f0-9]{2})/pack("c",hex($1))/ge;

         # Associate key and value
         $in{$key} .= "\0" if (defined($in{$key})); # \0 is the multiple separator
         $in{$key} .= $val;
       }

     } elsif ($ENV{'CONTENT_TYPE'} =~ m#^multipart/form-data#) {
       # for efficiency, compile multipart code only if needed
    $errflag = !(eval <<'END_MULTIPART');

       local ($buf, $boundary, $head, @heads, $cd, $ct, $fname, $ctype, $blen);
       local ($bpos, $lpos, $left, $amt, $fn, $ser);
       local ($bufsize, $maxbound, $writefiles) =
         ($cgi_lib'bufsize, $cgi_lib'maxbound, $cgi_lib'writefiles);


       # The following lines exist solely to eliminate spurious warning messages
       $buf = '';

       ($boundary) = $type =~ /boundary="([^"]+)"/; #";   # find boundary
       ($boundary) = $type =~ /boundary=(\S+)/ unless $boundary;
       &CgiDie ("Boundary not provided: probably a bug in your server")
         unless $boundary;
       $boundary =  "--" . $boundary;
       $blen = length ($boundary);

       if ($ENV{'REQUEST_METHOD'} ne 'POST') {
         &CgiDie("Invalid request method for  multipart/form-data: $meth\n");
       }

       if ($writefiles) {
         local($me);
         stat ($writefiles);
         $writefiles = "/tmp" unless  -d _ && -r _ && -w _;
         # ($me) = $0 =~ m#([^/]*)$#;
         $writefiles .= "/$cgi_lib'filepre";
       }

       # read in the data and split into parts:
       # put headers in @in and data in %in
       # General algorithm:
       #   There are two dividers: the border and the '\r\n\r\n' between
       # header and body.  Iterate between searching for these
       #   Retain a buffer of size(bufsize+maxbound); the latter part is
       # to ensure that dividers don't get lost by wrapping between two bufs
       #   Look for a divider in the current batch.  If not found, then
       # save all of bufsize, move the maxbound extra buffer to the front of
       # the buffer, and read in a new bufsize bytes.  If a divider is found,
       # save everything up to the divider.  Then empty the buffer of everything
       # up to the end of the divider.  Refill buffer to bufsize+maxbound
       #   Note slightly odd organization.  Code before BODY: really goes with
       # code following HEAD:, but is put first to 'pre-fill' buffers.  BODY:
       # is placed before HEAD: because we first need to discard any 'preface,'
       # which would be analagous to a body without a preceeding head.

       $left = $len;
      PART: # find each part of the multi-part while reading data
       while (1) {
         die $@ if $errflag;

         $amt = ($left > $bufsize+$maxbound-length($buf)
         ?  $bufsize+$maxbound-length($buf): $left);
         $errflag = (($got = read(STDIN, $buf, $amt, length($buf))) != $amt);
         die "Short Read: wanted $amt, got $got\n" if $errflag;
         $left -= $amt;

         $in{$name} .= "\0" if defined $in{$name};
         $in{$name} .= $fn if $fn;

         $name=~/([-\w]+)/;  # This allows $insfn{$name} to be untainted
         if (defined $1) {
           $insfn{$1} .= "\0" if defined $insfn{$1};
           $insfn{$1} .= $fn if $fn;
         }

        BODY:
         while (($bpos = index($buf, $boundary)) == -1) {
           die $@ if $errflag;
           if ($name) {  # if no $name, then it's the prologue -- discard
             if ($fn) { print FILE substr($buf, 0, $bufsize); }
             else     { $in{$name} .= substr($buf, 0, $bufsize); }
           }
           $buf = substr($buf, $bufsize);
           $amt = ($left > $bufsize ? $bufsize : $left); #$maxbound==length($buf);
           $errflag = (($got = read(STDIN, $buf, $amt, $maxbound)) != $amt);  
    die "Short Read: wanted $amt, got $got\n" if $errflag;
           $left -= $amt;
         }
         if (defined $name) {  # if no $name, then it's the prologue -- discard
           if ($fn) { print FILE substr($buf, 0, $bpos-2); }
           else     { $in {$name} .= substr($buf, 0, $bpos-2); } # kill last \r\n
         }
         close (FILE);
         last PART if substr($buf, $bpos + $blen, 4) eq "--\r\n";
         substr($buf, 0, $bpos+$blen+2) = '';
         $amt = ($left > $bufsize+$maxbound-length($buf)
         ? $bufsize+$maxbound-length($buf) : $left);
         $errflag = (($got = read(STDIN, $buf, $amt, length($buf))) != $amt);
         die "Short Read: wanted $amt, got $got\n" if $errflag;
         $left -= $amt;


         undef $head;  undef $fn;
        HEAD:
         while (($lpos = index($buf, "\r\n\r\n")) == -1) {
           die $@ if $errflag;
           $head .= substr($buf, 0, $bufsize);
           $buf = substr($buf, $bufsize);
           $amt = ($left > $bufsize ? $bufsize : $left); #$maxbound==length($buf);
           $errflag = (($got = read(STDIN, $buf, $amt, $maxbound)) != $amt);  
           die "Short Read: wanted $amt, got $got\n" if $errflag;
           $left -= $amt;
         }
         $head .= substr($buf, 0, $lpos+2);
         push (@in, $head);
         @heads = split("\r\n", $head);
         ($cd) = grep (/^\s*Content-Disposition:/i, @heads);
         ($ct) = grep (/^\s*Content-Type:/i, @heads);

         ($name) = $cd =~ /\bname="([^"]+)"/i; #";
         ($name) = $cd =~ /\bname=([^\s:;]+)/i unless defined $name;  

         ($fname) = $cd =~ /\bfilename="([^"]*)"/i; #"; # filename can be null-str
         ($fname) = $cd =~ /\bfilename=([^\s:;]+)/i unless defined $fname;
         $incfn{$name} .= (defined $in{$name} ? "\0" : "") . $fname;

         ($ctype) = $ct =~ /^\s*Content-type:\s*"([^"]+)"/i;  #";
         ($ctype) = $ct =~ /^\s*Content-Type:\s*([^\s:;]+)/i unless defined $ctype;
         $inct{$name} .= (defined $in{$name} ? "\0" : "") . $ctype;

         if ($writefiles && defined $fname) {
           $ser++;
    $fn = $writefiles . ".$$.$ser";
    open (FILE, ">$fn") || &CgiDie("Couldn't open $fn\n");
           binmode (FILE);  # write files accurately
         }
         substr($buf, 0, $lpos+4) = '';
         undef $fname;
         undef $ctype;
       }

    1;
    END_MULTIPART
       if ($errflag) {
         local ($errmsg, $value);
         $errmsg = $@ || $errflag;
         foreach $value (values %insfn) {
           unlink(split("\0",$value));
         }
         &CgiDie($errmsg);
       } else {
         # everything's ok.
       }
     } else {
       &CgiDie("cgi-lib.pl: Unknown Content-type: $ENV{'CONTENT_TYPE'}\n");
     }

     # no-ops to avoid warnings
     $insfn = $insfn;
     $incfn = $incfn;
     $inct  = $inct;

     $^W = $perlwarn;

     return ($errflag ? undef :  scalar(@in));
    }


    好长哦~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  • 光是解析个数据就写这么多
    真是强啊~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    我已经没有耐心看下去了

  • 汗。。太长了。我建议你还是找原作者"飞云"帮忙吧。。
    在这里 http://bbs.perlchina.org/pl/ccb/index.cgi  可以找到他,他的ID是  hoowa ,那里的老大:)

  • 下面引用由曾子程在 2005/3/27 00:32am 发表的内容:
    光是解析个数据就写这么多
    真是强啊~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    我已经没有耐心看下去了

    这个lib是官方写的。。早于CGI.pm的版本:)
  • 知道啊 因为我们LB之前也是用过这个库的
    为什么木头总喜欢说一些大家都知道的事情?

Add a Reply

Forum Groups

Good Perl Books

Perl 學習手札

作者:簡信昌

Perl 學習手札 , 一本优秀的中文Perl在线学习资料。

Perl 程序讨论区

12 50

Last Topic: [注意]本区文章归档区 by 路杨 on Oct 16, 2006

事物处理及其他

113 1410

Last Topic: 踩个脚印 by 眼睛oo on Dec 4, 2010

测试私密区

测试私密区

18 88

Notice: 用于测试程序或者不可告人之秘密。不要问我要密码哦。。[此为原LB_ES论坛系统保留项,只限原有权限朋友进入,不再接受注册]

OpenID accepted here Learn more about OpenID