From f71abf8d535c494320afd24a157db5de9079970d Mon Sep 17 00:00:00 2001 From: IKEDA Soji Date: Wed, 11 Mar 2020 17:19:11 +0900 Subject: [PATCH] - WWSympa: Since CGI of some HTTP servers might split script-path and extra-path of script-URI inproperly, we'd be better to reconstruct them: SCRIPT_NAME and PATH_INFO. Note that we shouldn't use non-standard CGI environment varialbes such as REQUEST_URI. - Additional environment variable SYMPA_DOMAIN stands for available mail domain (a.k.a. "robot"). - If no robot providing web service was found according to client's request, error response will be returned. --- src/cgi/wwsympa.fcgi.in | 15 ++--- src/lib/Makefile.am | 1 + src/lib/Sympa/WWW/FastCGI.pm | 83 +++++++++++++++++++++++++ src/lib/Sympa/WWW/Tools.pm | 116 ++++++++++++++++++++--------------- 4 files changed, 155 insertions(+), 60 deletions(-) create mode 100644 src/lib/Sympa/WWW/FastCGI.pm diff --git a/src/cgi/wwsympa.fcgi.in b/src/cgi/wwsympa.fcgi.in index 2eb8aecb0..7689121ed 100644 --- a/src/cgi/wwsympa.fcgi.in +++ b/src/cgi/wwsympa.fcgi.in @@ -38,7 +38,6 @@ use strict; use lib split(/:/, $ENV{SYMPALIB} || ''), '--modulesdir--'; use Archive::Zip qw(); -use CGI::Fast qw(); use DateTime; use DateTime::Format::Mail; use Digest::MD5; @@ -92,6 +91,7 @@ use Sympa::Tools::Text; use Sympa::Tracking; use Sympa::User; use Sympa::WWW::Auth; +use Sympa::WWW::FastCGI; use Sympa::WWW::Marc::Search; use Sympa::WWW::Report; use Sympa::WWW::Session; @@ -1050,7 +1050,7 @@ $log->syslog('info', 'WWSympa started, process %d', $PID); # Main loop. my $loop_count = 0; my $start_time = time; -while ($query = CGI::Fast->new) { +while ($query = Sympa::WWW::FastCGI->new) { $loop_count++; undef $param; @@ -1111,13 +1111,10 @@ while ($query = CGI::Fast->new) { %in = $query->Vars; # Determin robot. - # N.B. As of 6.2.15, the http_host parameter will match with the host name - # and path locally detected by server. If remotely detected host name - # and / or path should be differ, the proxy must adjust them. - # N.B. As of 6.2.34, wwsympa_url parameter may be optional. - $robot = Sympa::WWW::Tools::get_robot('http_host', 'wwsympa_url'); - unless (Conf::get_robot_conf($robot, 'wwsympa_url')) { - print "Status: 404 Not Found\n"; + $robot = $ENV{SYMPA_DOMAIN}; + unless ($robot) { + # No robot providing web service found. + print "Status: 421 Misdirected Request\n"; print "\n\n"; next; } diff --git a/src/lib/Makefile.am b/src/lib/Makefile.am index d4a070347..9c547c48c 100644 --- a/src/lib/Makefile.am +++ b/src/lib/Makefile.am @@ -188,6 +188,7 @@ nobase_modules_DATA = \ Sympa/Upgrade.pm \ Sympa/User.pm \ Sympa/WWW/Auth.pm \ + Sympa/WWW/FastCGI.pm \ Sympa/WWW/Marc.pm \ Sympa/WWW/Marc/Search.pm \ Sympa/WWW/Report.pm \ diff --git a/src/lib/Sympa/WWW/FastCGI.pm b/src/lib/Sympa/WWW/FastCGI.pm new file mode 100644 index 000000000..a55e371af --- /dev/null +++ b/src/lib/Sympa/WWW/FastCGI.pm @@ -0,0 +1,83 @@ +# -*- indent-tabs-mode: nil; -*- +# vim:ft=perl:et:sw=4 + +# Sympa - SYsteme de Multi-Postage Automatique +# +# Copyright 2020 The Sympa Community. See the AUTHORS.md +# file at the top-level directory of this distribution and at +# . +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +package Sympa::WWW::FastCGI; + +use strict; +use warnings; + +use base qw(CGI::Fast); + +use Sympa::WWW::Tools; + +sub new { + my $class = shift; + my @args = @_; + + my $self = $class->SUPER::new(@args); + + # Determin mail domain (a.k.a. "robot") the request is dispatched. + # N.B. As of 6.2.15, the http_host parameter will match with the host name + # and path locally detected by server. If remotely detected host name + # and / or path should be differ, the proxy must adjust them. + # N.B. As of 6.2.34, wwsympa_url parameter may be optional. + my @vars = Sympa::WWW::Tools::get_robot('http_host', 'wwsympa_url'); + if (@vars) { + @ENV{qw(SYMPA_DOMAIN SCRIPT_NAME PATH_INFO)} = @vars; + } else { + delete $ENV{SYMPA_DOMAIN}; + } + + $self; +} + +1; + +__END__ + +=encoding utf-8 + +=head1 NAME + +Sympa::WWW::FastCGI - CGI Interface for FastCGI of Sympa + +=head1 SYNOPOSIS + + TBD. + +=head1 DESCRIPTION + +TBD. + +=head1 SEE ALSO + +L. + +RFC 3875, The Common Gateway Interface (CGI) Version 1.1. +L. + +=head1 HISTORY + +L appeared on Sympa 6.2.55b. + +=cut + diff --git a/src/lib/Sympa/WWW/Tools.pm b/src/lib/Sympa/WWW/Tools.pm index 62eb7a1c6..4fadc71c6 100644 --- a/src/lib/Sympa/WWW/Tools.pm +++ b/src/lib/Sympa/WWW/Tools.pm @@ -33,6 +33,7 @@ use Digest::MD5; use English qw(-no_match_vars); use File::Path qw(); use URI; +use URI::Escape qw(); use Sympa; use Conf; @@ -227,65 +228,78 @@ sub get_my_url { sub get_robot { my @keys = @_; - my $request_host = _get_server_name(); - my $request_path = $ENV{'REQUEST_URI'} || ''; - my $robot_id; - - if (defined $request_host and length $request_host) { - my $selected_path = ''; - foreach my $rid (Sympa::List::get_robots()) { - my $local_url; - foreach my $key (@keys) { - $local_url = Conf::get_robot_conf($rid, $key); - last if $local_url; - } - next unless $local_url; - - if ($local_url =~ m{\A[-+\w]+:}) { - ; - } elsif ($local_url =~ m{\A//}) { - $local_url = 'http:' . $local_url; - } else { - $local_url = 'http://' . $local_url; - } + # Get host part of script-URI from standard CGI environment variable + # SERVER_NAME. + # NOTE: As of 6.2.15, less trustworthy "X-Forwarded-Server:" request field + # is _no longer_ referred and this function returns only locally detected + # server name. + my $request_host = lc($ENV{SERVER_NAME} // ''); + return unless length $request_host; + my $ipv6_re = Sympa::Regexps::ipv6(); + if ($request_host =~ /\A$ipv6_re\z/) { # IPv6 address + $request_host = sprintf '[%s]', $request_host; + } + + # Since CGI of some HTTP servers might split script-path and extra-path of + # script-URI inproperly, we'd be better to reconstruct them from these + # standard CGI environment variables: + # - SCRIPT_NAME: a URI path which could identify the CGI script. + # - PATH_INFO: derived from the portion of the URI path hierarchy + # following the part that identifies the script itself. + # Note that they are not URL-encoded, unlike non-standard REQUEST_URI. + my $org_script_name = $ENV{SCRIPT_NAME} // ''; + my $org_path_info = $ENV{PATH_INFO} // ''; + return unless '' eq $org_script_name or 0 == index $org_script_name, '/'; + return unless '' eq $org_path_info or 0 == index $org_path_info, '/'; + my $request_path = $org_script_name . $org_path_info; + + # Find mail domain (a.k.a. "robot") of which web URL matches script-URI. + my ($robot_id, $script_path) = (undef, ''); + foreach my $rid (Sympa::List::get_robots()) { + my $local_url; + foreach my $key (@keys) { + $local_url = Conf::get_robot_conf($rid, $key); + last if $local_url; + } + next unless $local_url; - my $uri = URI->new($local_url); - next - unless $uri - and $uri->scheme - and grep { $uri->scheme eq $_ } qw(http https); - - my $host = lc($uri->host || ''); - my $path = $uri->path || '/'; - #FIXME:might need percent-decode hosts and/or paths - next - unless $request_host eq $host - and 0 == index $request_path, $path; - - # The longest path wins. - ($robot_id, $selected_path) = ($rid, $path) - if length $selected_path < length $path; + if ($local_url =~ m{\A[-+\w]+:}) { + ; + } elsif ($local_url =~ m{\A//}) { + $local_url = 'http:' . $local_url; + } else { + $local_url = 'http://' . $local_url; } - } - return (defined $robot_id) ? $robot_id : $Conf::Conf{'domain'}; -} + my $uri = URI->new($local_url); + next + unless $uri + and $uri->scheme + and grep { $uri->scheme eq $_ } qw(http https); -# Old name: (part of) get_header_field() in wwsympa.fcgi. -# NOTE: As of 6.2.15, less trustworthy "X-Forwarded-Server:" request field is -# _no longer_ referred and this function returns only locally detected server -# name. -sub _get_server_name { - my $server = $ENV{SERVER_NAME}; - return undef unless defined $server and length $server; + my $host = lc URI::Escape::uri_unescape($uri->host // ''); + my $path = URI::Escape::uri_unescape($uri->path // '/'); + next unless $request_host eq $host; + next + unless $request_path eq $path + or 0 == index($request_path, $path . '/'); - my $ipv6_re = Sympa::Regexps::ipv6(); - if ($server =~ /\A$ipv6_re\z/) { # IPv6 address - $server = "[$server]"; + # The longest path wins. + ($robot_id, $script_path) = ($rid, $path) + if length $script_path < length $path; } - return lc $server; + + return unless $robot_id; + return + wantarray + ? ($robot_id, $script_path, substr $request_path, length $script_path) + : $robot_id; } +# Old name: (part of) get_header_field() in wwsympa.fcgi. +# No longer used. +#sub _get_server_name; + # Old name: (part of) get_header_field() in wwsympa.fcgi. # NOTE: As of 6.2.15, less trustworthy "X-Forwarded-Host:" request field is # _no longer_ referred and this function returns only locally detected host