#!/usr/bin/perl -T -w # # whois_nameserver_users # # AUTHOR: # Dan Harkless # # COPYRIGHT: # This file is Copyright (C) 2003 by Dan Harkless, and is released under the # GNU General Public License . # # USAGE: # % whois_nameserver_users # # DESCRIPTION: # NOTE: Network Solutions apparently disabled their nonstandard 'host' WHOIS # command in May 2002, so this script no longer reports useful data. # # Takes one parameter, a domain name to look up. In what way do we look it # up, exactly? We first do a whois query to whois.internic.net. This tells # us whether or not the domain is registered through Network Solutions, Inc. # If it is, we're in luck, because NSI (and no one else??) has implemented # nonstandard WHOIS commands that allow us to retrieve a list of domains that # use a particular nameserver. Why would we want this info? Well, maybe # we're an ISP that wants to woo away another ISP's customers. Nefarious? # Well, if that ISP does a good enough job, their customers won't _want_ to # leave. # # Unfortunately, as of this writing, Network Solutions only returns up to 50 # domains per nameserver. Originally there was no limit, and then several # years ago the limit went down to 250, and now it's 50. Luckily, the # information returned for primary vs. secondary (and tertiary...) nameservers # is often slightly different, so we can push a bit above the 50 limitation. # # For each domain name we find, we traceroute to www.. This is # partially to kill time in order to not go over the query limit on NSI's # whois server, and it's partially to see if this company has their connection # through the ISP we're checking out, and whether they have a website online. # # We also do an nslookup for MX records to see if the domain seems to have a # dedicated mailserver. # # Note that this script can be the target of an ISINDEX (and thus will also # work on the commandline), a GET FORM, or a POST FORM. If called with no # parameters, we generate our own query form. # # DATE MODIFICATION # ========== ================================================================== # 2003-10-03 Hadn't used this since 2000 or 2001 (the 3-15 update was a generic # one done to multiple scripts). Found that the jwhois client (in # contrast to the AIX and Solaris whois clients used during original # development) needed an extra -n parameter -- this is now added if # jwhois is detected. Also updated ${FIRST,LAST}_LINE_OF_NSI_- # DISCLAIMER_REGEXP to current values. However, NSI has apparently # disabled their nonstandard 'host' WHOIS command, so unless they # re-enable that, this script will no longer report useful data. # 2003-03-15 This script is really intended for intranet rather than public # website use, but just in case, use CGI.pm's DISABLE_UPLOADS and # POST_MAX to protect against DoS attacks. POSTs >1 KiB will error. # 2000-06-07 $CHILD_ERROR is packed exit code. Lose the signal/core dump part. # 2000-06-03 Added option to email results when complete due to stupid browsers # timing out before we're done spitting out data. # 2000-06-02 Allow choice of HTML or plain text output. # 2000-06-02 Allow setting of network command timeout via the query form. # 2000-06-02 Generate our own query form if called with no parameters. # 2000-06-02 traceroutes can take so long browser times out -- fork and alarm. # 2000-06-01 Original. ## Modules used ################################################################ use CGI; use English; # allow long English names like $PROGRAM_NAME instead of $0 use POSIX; # for strftime() # TBD: Use Carp so early errors still go to browser. # Use only while debugging (due to major performance hit): #use diagnostics; # turn on -w and output verbose versions of warnings ## Constants ################################################################### $FIRST_LINE_OF_NSI_DISCLAIMER_REGEXP = "^NOTICE AND TERMS OF USE: You are not"; $LAST_LINE_OF_NSI_DISCLAIMER_REGEXP = "^Network Solutions reserves the right"; $NSI_WHOIS_SERVER = "whois.networksolutions.com"; $TOP_LEVEL_US_WHOIS_SERVER = "whois.internic.net"; ## Initializations ############################################################# $CGI::DISABLE_UPLOADS = 1; $CGI::POST_MAX = 1024; $OUTPUT_AUTOFLUSH = 1; # let the browser render as dynamically as possible # Un-taint the $PATH before we try to run programs. $ENV{PATH} = "/bin:/usr/bin:/usr/lib:/usr/local/bin:/usr/sbin"; open(STDERR, ">&" . STDOUT); # duplicate stderr to stdout so we see errors $format = "html"; # output format (can also be "text" on the results page) $meta_tags = {Generator => "whois_nameserver_users version 2000-06-07, by\n" . 'Dan Harkless <software@harkless.org> --' . ' http://harkless.org/dan/software/'}; @start_html_params = (-dtd => "-//W3C//DTD HTML 3.2//EN", -meta => $meta_tags); undef $email; ## Subroutines ################################################################# sub die_pre_html_start { print $cgi->header; # Strip off the leading path from the script name. ($program_name_no_path = $PROGRAM_NAME) =~ s<.*/><>; push @start_html_params, (-title => "ERROR from $program_name_no_path"); print $cgi->start_html(@start_html_params); print $cgi->h1("ERROR from $program_name_no_path:"); print @ARG; print $cgi->end_html, "\n"; exit 1; } sub die_post_html_start { # Strip off the leading path from the script name. ($program_name_no_path = $PROGRAM_NAME) =~ s<.*/><>; heading(2, "ERROR from $program_name_no_path:"); print @ARG; if ($format eq "html") { print $cgi->end_html; } print "\n"; exit 1; } sub heading { $level = shift; if ($format eq "html") { # I know there's a better way to do this (with references, somehow), but # I don't have time to figure it out right now. if ($level == 1) { print $cgi->h1(@ARG); } elsif ($level == 2) { print $cgi->h2(@ARG); } elsif ($level == 3) { print $cgi->h3(@ARG); } else { print @ARG; # fail gracefully } } else { # $format eq "text" print @ARG; } print "\n"; } sub hr { if ($format eq "html") { print $cgi->hr; } else { # $format eq "text" print "\n"; for ($column = 1; $column <= 80; $column++) { print "-"; } print "\n"; } print "\n"; } sub html_escape { if ($format eq "html") { # This is a copy & paste of CGI.pm's internal escapeHTML() routine # except that I've changed '"' to escape as '"'; instead of '"' # since the latter was accidentally left out of the HTML 3.2 DTD, making # http://validator.w3.org/ complain about pages that have transformed # strings like 'Vinyl 12" Single'. my($self,$toencode) = @_; $toencode = $self unless ref($self); return undef unless defined($toencode); return $toencode if ref($self) && $self->{'dontescape'}; $toencode=~s/&/&/g; $toencode=~s/\"/&\#34;/g; $toencode=~s/>/>/g; $toencode=~s/\n"; } } sub pre_open { if ($format eq "html") { print "
\n";
    }
}

sub section_heading {
    if ($format eq "html") {
	print "

\n"; print "\n

"; } print shift, "."; if ($format eq "html") { print "  

"; } else { # $format eq "text" print " "; } heading(2, @ARG); if ($format eq "html") { print "

"; } print "\n"; } ## Parameters ################################################################## $cgi = new CGI; if ($cgi->cgi_error()) { # Surprisingly, current browsers (as of 2003) don't respond properly to HTTP # status code 413, so we'll generate an HTML error page rather than calling # $cgi->header($cgi->cgi_error()). die_pre_html_start($cgi->cgi_error()); } @param_names = $cgi->param; if (scalar @param_names == 0) { $mode = "query"; $title = "WHOIS Nameserver Users Query"; } else { $mode = "results"; if ($param_names[0] eq "keywords") { # We were called from an or on the commandline w/o '='s. @keyword = $cgi->keywords; # get searchpart of URL / commandline params if (scalar @keyword != 1) { die_pre_html_start("One domain name parameter is required."); } $domain_param = $keyword[0]; } else { # We're the target of a GET or POST form. if (defined $cgi->param("domain")) { $domain_param = $cgi->param("domain"); } else { die_pre_html_start("Missing domain=value."); } if (defined $cgi->param("email") and $cgi->param("email") ne "") { $email = $cgi->param("email"); # else we output to stdout if (defined $cgi->param("format")) { $email_format = $cgi->param("format"); } else { $email_format = "html"; } } elsif (defined $cgi->param("format")) { $format = $cgi->param("format"); # else default to html per above } if (defined $cgi->param("timeout")) { $timeout = $cgi->param("timeout"); } else { $timeout = 60; } } # Un-taint the domain name by allowing only alphanumeric, hyphen, and period # characters. if ($domain_param =~ m<^([-\w.]+)$>) { # Domain name has no illegal characters. $domain_param = $1; } else { die_pre_html_start("Illegal domain name '", $domain_param, "'. ", " Only alphanumeric, hyphen, and period", " characters are allowed in domain names."); } $title = "WHOIS Nameserver Users Results For $domain_param"; } ## HTML start ################################################################## if ($format eq "html") { print $cgi->header; push @start_html_params, (-title => $title); print $cgi->start_html(@start_html_params), "\n"; } else { # $format eq "text" print $cgi->header("text/plain"); } ## Body ######################################################################## heading(1, $title); if ($mode eq "query") { print $cgi->p("Enter a domain name. We'll look it up at\n", "$TOP_LEVEL_US_WHOIS_SERVER to see if it's\n", "registered through Network Solutions. If it is, we can\n", "take this domain's nameservers and get a list of\n", "other domains that use those same servers.\n", "Unfortunately, as of this writing, we can only get 50\n", "domains per server (though luckily slightly different\n", "results are usually returned for primary vs. secondary\n", "(and tertiary...) nameservers, so we can get above the 50\n", "limitation)."), "\n"; print $cgi->p("For each domain, we'll do an MX lookup\n", "to see if they seem to have a dedicated mailserver."), "\n"; print $cgi->p("Also, for each domain, we'll\n", "traceroute to www.domain.\n", "Upon careful examination, this can tell us if a\n", "particular domain has their Internet connection through\n", "the provider we're checking out, whether they have a\n", "web server online, and whether it's a dedicated", "machine."), "\n"; print '
', "\n"; print $cgi->p('Domain to check (do not prepend "', 'www."):', "\n", ''), "\n"; print $cgi->p("Email address to send results to (leave blank to output to", " screen):\n", ''), "\n"; print $cgi->p("Give up on network commands (currently just", " traceroute) after:\n", ' seconds' ), "\n"; print $cgi->p("Output format:
\n", 'HTML' . "  Plain text" . ''),"\n"; print $cgi->p(""), "\n"; print "
\n"; } else { # $mode eq "results" if (defined $email) { print $cgi->p("Results will be emailed to $email when complete.\n", "Please be patient -- this may take awhile (up to\n", "$timeout seconds times the number of domains found\n", "(which could be well over 50) plus time for the\n", "nslookup and whois calls."), "\n"; print $cgi->p("You may go back to the previous page to issue another\n", "simultaneous query, but doing too many at once may\n", "reduce the quality of the results (more timeouts may\n", "occur)."), "\n"; print $cgi->hr; print "This query acknowledgment generated " . strftime("%A, %B %e, %Y at %I:%M %p", localtime), ".\n"; if ($emailing_child_pid = fork) { # We're the parent -- exit so the webserver will know we're done # and output to the client. # TBD: Keep watching to see whether the child's done and output '.'s # or something until it is to keep the connection to the browser # from timing out. exit; } elsif (not defined $emailing_child_pid) { die_post_html_start("Can't fork(): $OS_ERROR."); } # Otherwise, we're the child -- go on about our business. close STDERR; # need to close both of these or we have inherited close STDOUT; # connections to the webserver, which will wait for input open STDOUT, "| sendmail -t" or die "Can't fork(): $OS_ERROR."; open(STDERR, ">&" . STDOUT); # dup stderr to stdout so we see errors print "To: $email\n"; print "Subject: $title\n"; $format = $email_format; if ($format eq "html") { print "Content-Type: text/html\n\n"; print $cgi->start_html(@start_html_params), "\n"; } print "\n"; heading(1, $title); } hr(); open WHOIS, "whois --version |"; $whois_version = ; close WHOIS; if ($whois_version =~ /jwhois/) { $no_redirect = "-n"; } else { $no_redirect = ""; } open WHOIS, "whois -h $TOP_LEVEL_US_WHOIS_SERVER $domain_param $no_redirect |" or die_post_html_start("Can't fork(): $OS_ERROR."); section_heading("A", "$TOP_LEVEL_US_WHOIS_SERVER\'s info on $domain_param"); pre_open(); while () { if (/No match/i) { $no_match = 1; } elsif (/Name Server:[\s]*([-\w.]+)/i) { push @name_servers, $1; } elsif (/Whois Server:[\s]*([-\w.]+)/i) { $whois_server = $1; } print html_escape($ARG); } pre_close(); # Can't die because whois always returns 255 on boneheaded AIX. close WHOIS or warn("whois failed with OS error \"$OS_ERROR\"", " and exit code ", ($CHILD_ERROR>>8)); if ($no_match) { die_post_html_start("No information for this domain. Either it", " doesn't exist, or it's an international domain.", " Either way, we can't go on."); } elsif ($whois_server ne $NSI_WHOIS_SERVER) { print "\n"; print "\n"; print $cgi->td("This domain appears to be registered through", " another registrar besides Network Solutions. Only", " Network Solutions is known to implement the", " non-standard WHOIS commands that allow our nameserver", " user query. We'll push on a bit more, but we'll", " probably fail."); print "\n
WARNING:  
\n"; } hr(); section_heading("B", "$whois_server\'s info on $domain_param\'s nameservers"); $i = 1; $name_servers_count = scalar @name_servers; foreach $name_server (@name_servers) { heading(3, "Nameserver $i / $name_servers_count: $name_server"); open WHOIS, "whois -h $whois_server 'host $name_server' |" or die_post_html_start("Can't fork(): $OS_ERROR."); pre_open(); while () { # Don't keep printing out Network Solutions' legal disclaimer. if (/$FIRST_LINE_OF_NSI_DISCLAIMER_REGEXP/) { $in_network_solutions_legal_disclaimer = 1; } if ($in_network_solutions_legal_disclaimer) { if (/$LAST_LINE_OF_NSI_DISCLAIMER_REGEXP/) { $in_network_solutions_legal_disclaimer = 0; } next; } if (/([-\w]+-HST)\)$/i) { $name_server_handles{$name_server} = $1; } print html_escape($ARG); } pre_close(); # Can't die because whois always returns 255 on boneheaded AIX. close WHOIS or warn("whois failed with OS error", " \"$OS_ERROR\" and exit code ", ($CHILD_ERROR>>8)); if ($format eq "text") { print "\n"; } $i++; } hr(); section_heading("C", "$whois_server\'s info on domains using", " $domain_param\'s individual\n nameservers"); $i = 1; foreach $name_server_handle (sort values %name_server_handles) { heading(3, "Nameserver handle $i / $name_servers_count:", " $name_server_handle"); print "\n"; open WHOIS, "whois -h $whois_server 'server $name_server_handle' |" or die_post_html_start("Can't fork(): $OS_ERROR."); pre_open(); while () { # Don't keep printing out Network Solutions' legal disclaimer. if (/$FIRST_LINE_OF_NSI_DISCLAIMER_REGEXP/) { $in_network_solutions_legal_disclaimer = 1; } if ($in_network_solutions_legal_disclaimer) { if (/$LAST_LINE_OF_NSI_DISCLAIMER_REGEXP/) { $in_network_solutions_legal_disclaimer = 0; } next; } if (/\(([-\w]+-DOM)\)/) { if (undef $domain_handles{$1}) { $domain_handles{$1} = 1; } } print html_escape($ARG); } pre_close(); # Can't die because whois always returns 255 on boneheaded AIX. close WHOIS or warn("whois failed with OS error", " \"$OS_ERROR\" and exit code ", ($CHILD_ERROR>>8)); $i++; print "\n\n"; } hr(); section_heading("D", "Info for each unique domain (sorted by domain", " handle) that's listed on one of\n the nameservers"); $i = 1; $domain_handles_count = scalar keys %domain_handles; foreach $domain_handle (sort keys %domain_handles) { heading(3, "Domain handle $i / $domain_handles_count:", " $domain_handle"); open WHOIS, "whois -h $whois_server !$domain_handle |" or die_post_html_start("Can't fork(): $OS_ERROR."); pre_open(); while () { # Don't keep printing out Network Solutions' legal disclaimer. if (/$FIRST_LINE_OF_NSI_DISCLAIMER_REGEXP/) { $in_network_solutions_legal_disclaimer = 1; } if ($in_network_solutions_legal_disclaimer) { if (/$LAST_LINE_OF_NSI_DISCLAIMER_REGEXP/) { $in_network_solutions_legal_disclaimer = 0; } next; } if (/Domain Name:[\s]*([-\w.]+)/i) { $domain_name = $1; } print html_escape($ARG); } pre_close(); # Can't die because whois always returns 255 on boneheaded AIX. close WHOIS or warn("whois failed with OS error", " \"$OS_ERROR\" and exit code ", ($CHILD_ERROR>>8)); open NSLOOKUP, "nslookup -querytype=MX $domain_name 2>/dev/null |" or die_post_html_start("Can't fork(): $OS_ERROR."); pre_open(); print "\nMX records for $domain_name: "; $MX_record_count = 0; undef $dedicated_mail_server; while () { if (/preference = ([0-9]+), mail exchanger = ([-\w.]+)/) { $preference = $1; $exchanger = $2; if ($MX_record_count == 0) { print "\n"; } print " Preference $preference: $exchanger\n"; if ($exchanger =~ /$domain_name/i) { $dedicated_mail_server = $exchanger; } $MX_record_count++; } } close NSLOOKUP; # don't really care if nslookup has an error return if ($MX_record_count == 0) { print "none\n"; } else { # There were some MX records -- let's make guesses based on them. print "\n"; if (defined $dedicated_mail_server) { print " Looks like they might have a dedicated mailserver:\n"; print " Name = $dedicated_mail_server\n"; open NSLOOKUP, "nslookup $dedicated_mail_server 2>/dev/null |" or die_post_html_start("Can't fork(): $OS_ERROR."); $dedicated_mail_server_address = "?"; while () { if (/Address:[\s]*([0-9.]+)/) { $dedicated_mail_server_address = $1; } } close NSLOOKUP; print " IP address = $dedicated_mail_server_address\n"; if ($dedicated_mail_server_address ne "?") { open NSLOOKUP, "nslookup $dedicated_mail_server_address 2>/dev/null |" or die_post_html_start("Can't fork(): $OS_ERROR."); $dedicated_mail_server_canon_name = "[not set up]"; while () { if (/Name:[\s]*([-\w.]+)/) { $dedicated_mail_server_canon_name = $1; } } close NSLOOKUP; print " Reverse lookup = " . "$dedicated_mail_server_canon_name\n"; } } else { print " Looks like there isn't a dedicated mailserver for" . " this domain.\n"; } } print "\n\n"; pre_close(); if ($tracerouting_child_pid = open TRACEROUTING_CHILD, "-|") { # We are the parent. pre_open(); $aborted = 0; $final_hop_number = 0; $final_host = ""; $unknown = 0; while () { if (/aborting\]/) { $aborted = 1; } elsif (/unknown host/) { $unknown = 1; } elsif (/([0-9]+)[\s]+([-\w.]+) \(/) { $final_hop_number = $1; $final_host = $2; } print html_escape($ARG); } if ($final_hop_number != 30 and not $aborted and not $unknown) { print "\n"; if ($final_host =~ /$domain_name/i) { print " Looks like they have a dedicated webserver.\n"; } else { print " Looks like there isn't a dedicated webserver for" . " this domain.\n"; } } print "\n\n"; pre_close(); # Don't check the return code -- the child already did any necessary # error reporting. close TRACEROUTING_CHILD; } elsif (defined $tracerouting_child_pid) { # We are the child. sub catch_timeout { print "[traceroute not finished after $timeout" . " seconds -- aborting]\n"; exit; } $SIG{ALRM} = \&catch_timeout; # Need to timeout the traceroute eventually, or the web browser may # timeout. alarm $timeout; open TRACEROUTE, "traceroute www.$domain_name |" or die("Can't fork(): $OS_ERROR"); while () { print; } # Don't `alarm 0' here in case the close can hang... # Hopefully in all cases where traceroute fails, it'll output an # error. Don't output a second one -- we don't care what its exit # code was. close TRACEROUTE; exit; } else { print "Can't fork(): $OS_ERROR.\n"; # don't die; this isn't critical } print "\n"; $i++; } } ## Footer ###################################################################### hr(); print "This "; if ($mode eq "query") { print "query form"; } else { # $mode eq "results" print "report"; } print " generated ", strftime("%A, %B %e, %Y at %I:%M %p", localtime), ".\n"; ## HTML end #################################################################### if ($format eq "html") { print $cgi->end_html, "\n"; }