gilou Modérateur Modzilla | J'ai extrait un peu de code d'un de mes programmes
Code :
- # web management packages
- use LWP::UserAgent;
- use HTTP::Request;
- use HTTP::Response;
- use HTTP::Status;
- use URI::Heuristic;
- ....................................................................................................
- my $connection_address = "http://test.perlscript.org";
- my $connection_agent = "Testing-Perl-Script/v1.0";
- my $http_engine = LWP::UserAgent->new();
- $http_engine->agent($connection_agent);
- $http_engine->cookie_jar({}); #in memory cookie jar
- $http_engine->timeout(50); # As the servers are fast, this should be OK
- ....................................................................................................
- #ici, $file est un fichier ou je vais sauver l'url distante $raw_url
- save_url($file, 0, $raw_url);
- ....................................................................................................
- sub save_url {
- my ($file, $mode, $raw_url) = @_;
- my $data;
- my $result = fetch_url($raw_url, \$data);
- unless ($result) {
- trace_msg ("Error", "Cannot fetch page $raw_url from site: $data" ) and return 0;
- }
- unless ($debug) {
- open my $FILE, '>', $file
- or ( trace_msg("Error", "Can't open '$file' for writing: $OS_ERROR" ) and return 0);
- binmode $FILE if $mode;
- print $FILE $data;
- close $FILE
- or ( trace_msg("Error", "Can't close '$file' after writing: $OS_ERROR" ) and return 0);
- }
- return 1;
- }
- ....................................................................................................
- sub fetch_url {
- my ($raw_url, $data) = @_;
- my $url = URI::Heuristic::uf_urlstr($raw_url);
- $| = 1; # to flush next line
- my $request = new HTTP::Request(GET => $url);
- $request->referer($connection_address);
- if ($debug) {
- trace_msg("Debug", "Fetching page $raw_url" );
- $$data = "<debug></debug>";
- return 1;
- }
- else {
- trace_msg("User+", "Fetching page $raw_url..." );
- my $response = $http_engine->request($request);
- trace_msg("+User", "...Done\n" );
- if ($response->code != RC_OK) {
- if ($response->code == RC_REQUEST_TIMEOUT) {
- # Try again a second time if we get a time out
- my $req = HTTP::Request->new(GET => $url);
- $req->referer($connection_address);
- trace_msg("User+", "Retrying to fetch page $raw_url..." );
- my $response = $http_engine->request($req);
- trace_msg("+User", "...Done\n" );
- if ($response->code != RC_OK)
- {
- $$data = $response->status_line;
- return 0;
- }
- else {
- $$data = $response->content;
- return 1;
- }
- }
- else {
- $$data = $response->status_line;
- return 0;
- }
- }
- else {
- $$data = $response->content;
- return 1;
- }
- }
- }
|
Ca devrait te donner des billes pour ce que tu fais.
Dans fetch_url, je reessaye au moins une fois en cas d'echec, car par experience, on a parfois des echecs au chargement, mais rarement deux echecs successifs (sauf reel probleme).
dans save_url($file, 0, $raw_url); le second parametre est a garder a 0 si tu récupere du html (mode texte), et a mettre a 1 si tu récuperes des images par exemple (mode binaire) [sous linux, le mode 0 devrait passer partout a priori, mais pas sous windows]
Tu peux virer les appels à trace_message de mon exemple. Le code de cette procédure était:
Code :
- sub trace_msg {
- my ($level, $message) = @_;
- my $prefix = 1;
- my $suffix = 1;
- if ($trace) {
- if ($level =~ /^\+/o) {
- $prefix = 0;
- }
- if ($level =~ /\+$/o) {
- $suffix = 0;
- }
- if ($level =~ /Error/io) {
- $message = "Error:".$message if ($prefix);
- $message .= "\n" if ($suffix);
- print $message;
- }
- elsif ($level =~ /User/io) {
- $message .= "\n" if ($suffix);
- print $message;
- }
- elsif ($level =~ /Warning/io and $trace < 3) {
- $message = "Warning:".$message if ($prefix);
- $message .= "\n" if ($suffix);
- print $message;
- }
- elsif ($level =~ /Info/io and $trace < 2) {
- $message = "Info:".$message if ($prefix);
- $message .= "\n" if ($suffix);
- print $message;
- }
- elsif ($level =~ /Debug/io and $debug) {
- $message = "Debug:".$message if ($prefix);
- $message .= "\n" if ($suffix);
- print $message;
- }
- }
- return 1; # for trace_msg(...) and ...
- }
|
A+, Message édité par gilou le 28-09-2008 à 12:12:41 ---------------
There's more than what can be linked! -- Iyashikei Anime Forever! -- AngularJS c'est un framework d'engulé! --
|