#!/usr/bin/perl # # gen_form_search_db # # AUTHOR: # Dan Harkless http://harkless.org/dan/software/ # # COPYRIGHT: # This file is Copyright (C) 2006 by Dan Harkless, and is released under the # GNU General Public License . # # DESCRIPTION: # Depending on how it is called, either displays, generates a search form for, # or searches a database in my proprietary plain-text format. # # What is my proprietary database format? Well, it's not too exotic. The # database itself (a file ending in ".data") is a standard TAB-separated # "flat-file" database. Each line corresponds to a record and each # TAB-separated string corresponds to a field. For efficiency, the database # is assumed to be already sorted in the default sort order -- we only # explicitly sort it if a non-standard sort order is chosen. Therefore, be # sure to run it through something like the UNIX sort(1) command after adding # rows. # # What makes my format interesting is the companion "description" file. If # you have a database called "books.data", you also need a description file # called "books.desc". The first line of that file is the name of the # database. The lines following that describe each field in the database. # # There are three different kinds of fields, "text", "html", and "enum". Say # the first field in the database is a "text" field. In this case, the line # following the title line in the .desc file will be the string "text", # followed by a TAB, then the name of the field. On the next line, there will # be an initial TAB followed by an integer, which tells this script how many # columns to make the text entry box for this field on the search form. A # text entry box is used because "text" fields can contain any random text. # # "html" fields are just like "text" fields except that they're not subjected # to HTML special character escaping. So if you want to use HTML tags in a # field, make it an "html" field. Otherwise, a string like # 'Douglas Adams' will be output as # that literal string, rather than as an active hyperlink. # # "enum" fields in the database, on the other hand, may contain only one of # the enumerated allowed values. enum field descriptions consist of the # string "enum", followed by a TAB, then the name of the field, as before. # Each subsequent line, however, consists of an initial TAB and then one of # the legal enumerated values (hopefully in alphabetic or some other logical # order). These enumerated values come out as individual check boxes on the # search form. # # How 'bout an example? Here's a toy "books.desc" (imagine there is no # initial "# " string on each line): # # Dan Harkless' Book Collection # enum Category # Biography # Sci-Fi # text Author(s) # 15 # text Title # 15 # # The corresponding "books.data" file would look something like this: # # Biography Lindskold, Jane M. Roger Zelazny # Sci-Fi Abbott, Edwin A. Flatland # Sci-Fi Zelazny, Roger To Die In Italbar # # Note that "To Die In Italbar" is over 15 characters. That's okay -- the # "15" only sets the width of the text entry field on the search form, but you # can type more text than that -- you just won't be able to see it all at # once. # # Besides the database field description lines, it's also possible to set # search parameter defaults in the .desc file. The names of the parameters # correspond to the NAMEs of the corresponding INPUTs (minus the # "gen_form_search_db::" prefix -- you can check out the HTML we generate to # see what I'm talking about here). For instance, the .desc file line: # # repeat_headings 10 # # will cause the "Repeat headings every: [__] record(s)" box to default to # "10" rather than the usual value of "0". # # Also, there are three parameters you can optionally set in the .desc file # that affect how the page is displayed. The first is "background", and will # cause the URL specified to be placed in the BACKGROUND= attribute of the # tag: # # background seamless_pattern_23.jpg # # The second, "favicon", will cause the URL specified to be placed in a tag in the section: # # favicon /favicon.ico # # The third, "footer", is a bit more complex. It specifies HTML to use as the # footer of the page. However, it doesn't actually represent the entire # footer, just the user-modifiable portion. The default footer (if there is # no user footer specified) consists of an
followed by a with # "Database last modified: " left-justified on the left-hand side and # "Validated HTML 4.01 Transitional" (linked to validator.w3.org) and # "Generator: gen_form_search_db" (linked to the anchor of this script on my # software web page) right-justified on the right-hand side. If there's a # user footer specified, its contents will be placed on the left-hand side # prior to the database timestamp. # # "footer", if it is specified, must be the final parameter in the .desc file. # This is because the HTML may either be specified as one long line after # "footer" and a TAB: # # footer Up to Parent # # *or* as multiple successive lines after "footer" and a newline: # # footer # Up # # Here's the above toy example repeated, but with all of these optional # parameters specified: # # Dan Harkless' Book Collection # enum Category # Biography # Sci-Fi # text Author(s) # 15 # text Title # 15 # interpret_strings regexp # case_sensitive 1 # one_record_per column # repeat_headings 10 # background seamless_pattern_23.jpg # favicon /favicon.ico # footer # Up # # (Note that the old way to specify the background graphic and footer was via # dedicated "background.url" and "footer.html" files, but this support has # been removed to minimize unnecessary filesystem I/O.) # # Okay, after all this blathering, you're no doubt interested to know how to # call the script. Let's take the example of database files called # /collections/music/CDs.{data,desc}, with the script located at # /collections/gen_form_search_db.cgi. There are actually 4 # different ways in which you can choose to call gen_form_search_db on your # database: # # 1. http://wwwhost/collections/gen_form_search_db.cgi?music/CDs # # When passing the database path (note we don't include a trailing ".data" # or ".desc" -- those suffixes will get appended when opening the # appropriate files) as a query string (HTTP GET method), # gen_form_search_db goes into "gen_form" mode. In this mode, it merely # displays a search form. Once you submit the form (you can submit without # changing any search parameters to get the whole database in the default # sort order), the script will get called via method #2, below, and the # data will be displayed. # # 2. http://wwwhost/collections/gen_form_search_db.cgi # (POST parameters include "gen_form_search_db::database=music/CDs") # # gen_form_search_db gets called like this when the search form is # submitted. This is "search_db" mode. The POSTed parameters determine # what subset of the database data is displayed, and in what order. Below # the table of search results is another copy of the search form, so you # can try a new search without having to back up in your browser history. # # Since this mode uses POST by default, you can't link directly to # particular search results from another web page. Actually, though, the # mode can also work using HTTP GET -- this is how the validator.w3.org # link in the footer is able to function. Linking to particular search # results that way is not recommended, though, since the URL would be # invalidated next time the .desc file was updated. # # 3. http://wwwhost/collections/gen_form_search_db.cgi/collections/music/CDs # # When passing the database path (which must be absolute -- note the # repetition of "/collections" here) to the script using the so-called # "PATH_INFO" part of the URL, gen_form_search_db goes into "display_db" # mode. The entire database is displayed, followed by the search form. # # This calling style and #4 below (which also goes into "display_db" mode) # are useful when you want the entry point to your database be a display of # the data itself, rather than just a search form which requires hitting # the submit button before data will appear. # # Note that normal search engines do not do any form submissions, so if you # want your database to be indexed, you'll have to make a "display_db" mode # link to it. Most search engines also do not index pages that they think # are CGI-generated, which is why we avoid using a '?' character in the URL # in this mode and the next one. # # A sneaky feature you can use in this mode is overriding .desc-settable # search parameters by passing them in a GET query. One case where this is # definitely useful is to override a nonzero "repeat_headings" from the # .desc if you're using an external frame-generating script (like my # "frame_generator") to keep the table headings onscreen at all times. For # example: # # http://wwwhost/collections/gen_form_search_db.cgi/collections/music/CDs?g # en_form_search_db::repeat_headings=0 # # 4. http://wwwhost/collections/music/CDs/ # # As mentioned in #3 above, this URL style will also put gen_form_search_db # in "display_db" mode. Note that this method assumes a slightly different # file layout than the above examples. Rather than there being # /collections/music/CDs.{data,desc} files, there are # /collections/music/CDs/{data,desc} files. Also, we have # /collections/music/CDs/index.cgi -- a symlink or hard link to # gen_form_search_db's real location (or a copy of it, if you're # unfortunate enough not to be able to make links on your system). # # To get this method to work, you need some cooperation from your # webserver. On Apache, you need to make sure this directory has "Options # FollowSymLinks" (assuming you're not using a hard link or copy), plus the # server's "DirectoryIndex" directive will need to include "index.cgi". # # If you want to use the implicit "./data" and "./desc" database file # locations but you don't want to have "index.cgi" in your "DirectoryIndex" # list for some reason, you can mention the script name explicitly, as in: # # http://wwwhost/collections/music/CDs/gen_form_search_db.cgi # # That will still require you to have a gen_form_search_db.cgi link or copy # in each directory containing a "data" / "desc" file pair, of course. # # As in the PATH_INFO version of display_db mode, you can override .desc- # set search parameters if you need to: # # http://wwwhost/collections/music/CDs/?gen_form_search_db::repeat_headings # =0 # # For examples of this script in action, please see the links in the # gen_form_search_db description on my Software page (URL at the top of the # file). # # VERSION: $version = "2006-12-20"; # # DATE MODIFICATION # ========== ================================================================== # 2006-12-20 Rearranged code so we could put out the newest of the modification # times of the 'data' file, the 'desc' file, and this script as the # Last-Modified: header. This, plus Apache's automatic handling of # If-Modified-Since:, will result in cross-session cacheability and # bandwidth savings. The user-visible "Database last modified:" # date in the footer remains based solely on the modification time # of the 'data' file. # 2006-12-20 Use the new 'use warnings' rather than -w on the shebang line so # we only output warnings in this script, not in modules we use. # To do this in a backwards-compatible way, the if.pm module # (available from CPAN) is now required. # 2006-12-11 Changed the hardcoded HTML type from 3.2 to 4.01 Transitional to # accomodate my new footers that use JavaScript-obfuscated mailto: # links. TBD: Make the doctype a parameter in the desc file? # 2006-04-07 To minimize unnecessary filesystem I/O and consolidate all the # settings into one place, removed support for the background.url # and footer.html files in favor of 'background' and 'footer' params # in the .desc file. Also added a 'favicon' parameter. # 2006-02-16 Removed \n from Generator -- Firefox displays it as a funky glyph. # 2005-05-19 perldoc.com has been down for a while, and may not come back up. # Changed http://www.perldoc.com/latest/pod/perlre.html link to # http://perldoc.perl.org/perlre.html. # 2003-06-29 Output the time as of a minute ago as the Last-Modified header. # 2003-06-29 Allowed override of .desc settings (e.g. to overcome nonzero # repeat_headings when using frames) using GET in display_db mode. # 2003-06-29 Added display_db mode, triggered via a database specified either # implicitly or using PATH_INFO. Can be spidered by search engines. # 2003-06-28 Search results page now duplicates the search form at the bottom, # with the fields filled in per the search that was just done. # 2003-06-28 Removed the UP_URL{} processing in the footer, since after # my upcoming changes, the search form will no longer logically be # the parent of a given results page. Multiple consecutive searches # should be thought of as lateral motion, with the up arrow allowing # you to go directly to the parent page that called # gen_form_search_db, without having to hit Back multiple times. # 2003-06-28 Added ability to change search parameter defaults in the .desc. # 2003-03-15 Use CGI.pm's DISABLE_UPLOADS and POST_MAX variables to protect # against DoS attacks. POSTs of more than 1 MiB will error. # 2003-03-10 Using $cgi->td() on database field name "Copy & paste" was causing # double-HTML-escaping and an error. Now rather than calling html_ # escape() when we read the .desc file, we just call it on output, # if needed (not needed if output via CGI.pm's routines, like td()). # 2003-02-10 Now warn if a line in the database contains too many fields. # 2003-01-01 Added "Repeat headings every: records" feature. # 2002-12-17 Now warn about illegal enum values as well as null ones. # 2002-12-06 Instructions at the top of the form needed to be updated to # reflect the recently-added capabilities. Also moved $version # setting to up above to help prevent forgetting to update it. # 2002-11-22 Made the "case-sensitive" checkbox apply not just to the search # strings, but also to database sorts. # 2002-11-21 Added a new field type, "html", which is just like "text" except # that special HTML characters don't get escaped on output (e.g. # '<' and '>' don't get output as "<" and ">"). # 2002-11-21 Added a new radio button to the search form, "Output one record # per: (O) row ( ) column". It does what it says. # 2002-11-21 That still wasn't enough to generate legal HTML 3.2. CGI.pm 2.69 # through 2.88 unconditionally generate a lang= attribute on the # tag, and there's no such thing in HTML 3.2. Patched my # copy of CGI.pm to not generate the attribute if -lang=>"" is # passed to start_html(). Sent the patch to Lincoln Stein, so # hopefully it'll be present in 2.89 and on. # 2002-11-21 CGI.pm versions 2.69 and above require the new -no_xhtml pragma or # validator.w3.org will not validate the output as HTML 3.2. # 2002-11-21 Need to check in sort_rows() if the fields are defined before # comparing them. For instance, if there's no data for the last # field in a record, the database owner might forget a trailing TAB. # 2002-02-27 The actual searching and sorting features were TBDs until now -- # the entire database was always displayed, in default sort order. # 2002-02-22 Now instead of blindly cat'ing the footer.html file, process it, # transmogrifying the string UP_URL{} as per above description. # 2002-02-18 Previously only worked with relative paths to the database, but # that kind of sucks for people forced to use a global /cgi-bin dir # rather than being able to put the script near the database. Got # absolute paths working. While doing this, removed taint checking, # since it wasn't doing anything useful -- just restricting the # characters usable in pathnames (we don't use any subshells, so # there isn't that concern). # 2002-02-18 Use $OS_ERROR rather than rolling our own dir/file access errors. # 2002-02-16 Added "charset=ISO-8859-1" to the HTTP header to eliminate # validator.w3.org warning. If that ever causes a problem for # someone who wants to use this script with a different character # set, I'll add a way to customize it via file or environment. # 2002-02-16 Added "Validated HTML 3.2" and "Generator: gen_form_search_db" # links right-justified in footer; added live example link above. # 2000-06-01 No longer consider .cgi part of the name of the script. In some # installations it might have a different extension or none at all. # 2000-05-05 In the past, CGI.pm didn't initialize $cgi->keywords if no # arguments were specified in the searchpart of the URL (as during a # POST), which would result in a warning when diagnostics were # turned on. Therefore, if we wanted warnings active, we needed to # `use diagnostics' and surround the `@keyword = $cgi->keywords' # with `disable diagnostics' and `enable diagnostics'. # Unfortunately perl v5.6.0 has a bug that produces a diagnostic # when you say `disable diagnostics'. Luckily, the $cgi->keywords # warning no longer seems to appear. We could just leave # `use diagnostics' and get rid of the disable/enable, but a Usenet # thread alerted me to the fact that `use diagnostics' entails a # great startup delay, and I verified this experimentally. # Therefore, just backing off to -w on the shebang line. The # `use diagnostics' below can be uncommented for verbose warnings # when debugging. # 1999-07-07 Use $cgi->url() instead of hardcoding script name in FORM ACTION. # 1999-04-16 Merged generate_search_form and search_database into # gen_form_search_db to eliminate duplicated code w/o making pm. # 1999-03-03 Use latest CGI.pm's new -dtd option to say that we're HTML 3.2. # 1998-07-07 Output Generator META tag with name, version, and author of this # script. Be sure to update the META version below when revising! # 1997-11-25 Original (generate_search_form and search_database). ## Modules used ################################################################ use CGI qw(-no_xhtml); # need -no_xhtml for CGI.pm 2.69+ or get bad HTML verif. use English; # allow long English names like $PROGRAM_NAME instead of $0 use File::Basename; # for fileparse() use POSIX; # for ceil() # Output warnings only for this file, not for any modules we use. Pre-5.6.0 # Perls don't understand this syntax, so we use if.pm (avail. from CPAN) here. use if $] >= 5.006, warnings; ## Subroutines ################################################################# sub die_pre_html_start { print $cgi->header(-type => "text/html; charset=ISO-8859-1"); # Strip off the leading path from the script name. ($program_name_no_path = $PROGRAM_NAME) =~ s<.*/><>; push @start_html_params, (-title => "ERROR from $program_name_no_path"); print $cgi->start_html(@start_html_params); print $cgi->h1("ERROR from $program_name_no_path:"); print @ARG; print $cgi->end_html, "\n"; exit 1; } sub html_escape { # This is a copy & paste of CGI.pm's internal escapeHTML() routine except # that I've changed '"' to escape as '"'; instead of '"' since the # latter was accidentally left out of the HTML 3.2 DTD, making # http://validator.w3.org/ complain about pages that have transformed # strings like 'Vinyl 12" Single'. Also removed the here-meaningless # 'dontescape' checking. my $toencode = shift; return undef unless defined($toencode); $toencode=~s/&/&/g; $toencode=~s/\"/&\#34;/g; $toencode=~s/>/>/g; $toencode=~s/[$sort_position[$i]] and defined $b->[$sort_position[$i]]) { my $string_a = $a->[$sort_position[$i]]; my $string_b = $b->[$sort_position[$i]]; if (not $case_sensitive) { $string_a = lc($string_a); $string_b = lc($string_b); } # TBD: Strip out the HTML markup when sorting so that 'A' and 'B' will properly sort (A, B) # rather than (B, A). As it is now, we need to format the html data # values like 'B (URL)' for correct sorting. The # problem is that even if we implemented this, the sort-lines # command in Emacs would no longer produce the expected results, and # trying to visually determine whether a line was sorted correctly # would be difficult, so we'd pretty much need to implement a DB # editor more specialized than a text editor (but without losing any # of the advantages of using a text editor on the server -- thus it # would need to be a TTY or web application) to be able to take # advantage of this. # if ($fields[$i][1] eq "html") { # # } $potential_result = $string_a cmp $string_b; if ($potential_result != 0) { return $potential_result; } } } return 0; } sub url_escape { # This is a copy & paste of CGI.pm's internal escape() routine, except that # that routine was overly restrictive -- it disallowed [!~*'()] (presumably # because they're shell-special characters) even though they're perfectly # legal to appear unquoted in URLs. I've also caused '/' not to be quoted # since the only way we're going to get it in the URL and have it not mean # the normal URL path separate character is for it to be in the query part # of the URL, where it's legal to appear unescaped. Besides, # ?books-have/fiction reads better than ?books-have%2Ffiction. my $toencode = shift; return undef unless defined($toencode); $toencode =~ s<([^a-zA-Z0-9_.\-!~*\'()/])>eg; return $toencode; } ## Initializations ############################################################# $OUTPUT_AUTOFLUSH = 1; # let the browser render as dynamically as possible $CGI::DISABLE_UPLOADS = 1; $CGI::POST_MAX = 1048576; # 1 MiB $cgi = new CGI; if ($cgi->cgi_error()) { # Surprisingly, current browsers (as of 2003) don't respond properly to HTTP # status code 413, so we'll generate an HTML error page rather than calling # $cgi->header($cgi->cgi_error()). die_pre_html_start($cgi->cgi_error()); } $meta_tags = {Generator => "gen_form_search_db version $version, by" . ' Dan Harkless <software@harkless.org> --' . ' http://harkless.org/dan/software/'}; @start_html_params = (-dtd => "-//W3C//DTD HTML 4.01 Transitional//EN", -lang => "", -meta => $meta_tags); # First override opportunity for these is while we're reading the .desc file. # Second is if we're searching the database and corresponding param. was posted. $case_sensitive = 0; $interpret_strings = "plain"; $one_record_per = "row"; $repeat_headings = 0; ## Required Parameters ######################################################### @keyword = $cgi->keywords; # get searchpart of URL -- won't grab "=" $path_info = $cgi->path_info; # Figure out which mode we're in and get the name of the database. if (@keyword > 0) { # We were called with a database in the searchpart of the URL, so we're in # generate search form mode. $mode = "gen_form"; $database_path = $keyword[0]; } elsif ($path_info) { # We were called with the database in the additional path info part of the # URL, so we're in display database mode. $mode = "display_db"; $database_path = $path_info; } else { # No database in the URL. if ($cgi->param('gen_form_search_db::database')) { # We're being called (presumably by ourselves) in search database mode. $mode = "search_db"; $database_path = $cgi->param('gen_form_search_db::database'); } elsif (-e "data") { # We're implicitly using "data" in the current directory in display # database mode. TBD: Glob for *data so the file can be implicitly # selected yet still be called .data? An apparent 5.8.0 globbing # bug made this fail when I started to implement it. Can work around # with readdir(), but... $mode = "display_db"; $database_path = "./"; # dummy path } else { die_pre_html_start("Database name not provided in URL or", " gen_form_search_db::database POST", ' parameter, and there is no "data" file', " in the current directory."); } } if ($database_path eq "./") { $data_file = "data"; $desc_file = "desc"; } ($database_name, $database_dir) = fileparse $database_path; if (not defined $data_file) { $data_file = "$database_name.data"; $desc_file = "$database_name.desc"; } $chdir_dir = $database_dir; if ($database_dir ne "./") { $base = $cgi->url; if ($database_dir =~ m<^/>) { # Absolute dir specified -- remove all after "protocol://" for # base URL. $base =~ s<^([^/]+//[^/]+).*$><$1>; $document_root = $ENV{DOCUMENT_ROOT}; if (not defined $document_root) { die_pre_html_start("Absolute directory specified yet webserver" . " doesn't set \$ENV{DOCUMENT_ROOT}."); } $chdir_dir = "$document_root/$database_dir"; } else { # Relative dir specified -- for the base, just remove our script name # and any PATH_INFO and let the webserver interpret any '../'s that get # appended to resulting URL. $base =~ s<$path_info$><>; $base =~ s; } # We need to pretend this document was fetched from the specified database # directory so that relative links in the customizable .html and .url files # will work. $base .= $database_dir; # this ends with a '/' push @start_html_params, (-xbase => $base); } ## Input files ################################################################# if (not chdir($chdir_dir)) { die_pre_html_start("\"", html_escape($database_dir), "\": ", html_escape($OS_ERROR), "."); } elsif (not open(DATA, $data_file)) { die_pre_html_start("\"", html_escape($data_file), "\": ", html_escape($OS_ERROR), "."); } elsif (not open(DESC, $desc_file)) { die_pre_html_start("\"", html_escape($desc_file), "\": ", html_escape($OS_ERROR), "."); } else { # Get database modification time. @file_stat = stat DATA; $DATA_mod_time = $file_stat[9]; if ($mode eq "gen_form") { # When just displaying the search form, we're done with the db by this # point. close DATA; } # Look at the modification time of the DESC file and this script itself. # The newest of those two and the DATA file will be output as the # Last-Modified: header. @file_stat = stat DESC; $DESC_mod_time = $file_stat[9]; @file_stat = stat $PROGRAM_NAME; # TBD: handle possible failure here? $script_mod_time = $file_stat[9]; # Get title. chomp($database_title = ); $title = $database_title; if ($mode eq "search_db") { $title .= ": Search Results"; } push @start_html_params, (-title => $title); # Get field descriptions. while () { ($field_type, $field_name_or_value, $newline) = split /[\n\t]/; if ($continuing_enum and $field_type) { # The previous line was the last value for that enum. $fields[$field_count++] = [$continuing_enum, "enum", @enum_values]; undef $continuing_enum; undef @enum_values; } if (not $field_type) { push @enum_values, $field_name_or_value; } elsif ($field_type eq "enum") { $continuing_enum = $field_name_or_value; } elsif ($field_type eq "html" or $field_type eq "text") { ($field_type_next, $field_size, $newline) = split /[\n\t]/, ; if ($field_type_next) { die_pre_html_start("'$field_type' field '", html_escape($field_name_or_value), "' missing size limit."); } $fields[$field_count++] = [$field_name_or_value, $field_type, $field_size]; } elsif ($field_type eq "case_sensitive") { $case_sensitive = $field_name_or_value; } elsif ($field_type eq "interpret_strings") { $interpret_strings = $field_name_or_value; } elsif ($field_type eq "one_record_per") { $one_record_per = $field_name_or_value; } elsif ($field_type eq "repeat_headings") { $repeat_headings = $field_name_or_value; } elsif ($field_type eq "background") { push @start_html_params, (-background => $field_name_or_value); } elsif ($field_type eq "favicon") { push @start_html_params, (-head=>$cgi->Link({-rel=>'icon', -href=>$field_name_or_value})); } elsif ($field_type eq "footer") { $footer = ""; if ($field_name_or_value) { $footer = "$field_name_or_value\n"; } while () { $footer .= $ARG; } } else { die_pre_html_start("Unknown field type '", html_escape($field_type), "'."); } } if ($continuing_enum) { $fields[$field_count++] = [$continuing_enum, "enum", @enum_values]; } close DESC; } ## HTTP header ################################################################# # For the Last-Modified: header, output the newest of the modification # timestamps of the DATA file, the DESC file, and this script. This will allow # caching of our results (when called via GET, at least -- putting out # Last-Modified: for POST results apparently doesn't allow caching but doesn't # seem to cause any harm, either). Note that if you have to upgrade CGI.pm (or # another module) due to a bug, the Last-Modified: header won't reflect this and # clients may still see the old buggy output. You can simply 'touch' this # script in such cases to work around this. # # Surprisingly, we don't need to do our own checking for the If-Modified-Since: # header (or related) and outputting of 304 Not Modified. We can just output # our status 200 as usual, along with the semi-static Last-Modified: timestamp, # and Apache (at least as of 2.0.52 plus Red Hat AS 4 backported patches) will # change that to a 304, when appropriate, for us. # # TBD: Output ETag headers too? Would this require us to avoid colliding with # Apache's own ETag values? $mod_time = $script_mod_time; if ($DATA_mod_time > $mod_time) { $mod_time = $DATA_mod_time; } if ($DESC_mod_time > $mod_time) { $mod_time = $DESC_mod_time; } # Note we can't use POSIX::strftime() because it'll output an illegal date if # the default locale the web server's running under isn't English. use vars qw($yday $isdst); # eliminate warnings ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) = gmtime($mod_time); @MON = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec); @WDAY = qw(Sun Mon Tue Wed Thu Fri Sat); $Last_Modified = sprintf("%s, %02d %s %04d %02d:%02d:%02d GMT", $WDAY[$wday], $mday, $MON[$mon], $year + 1900, $hour, $min, $sec); print $cgi->header(-type => "text/html; charset=ISO-8859-1", -Last_Modified => $Last_Modified); open(STDERR, ">&" . STDOUT); # duplicate stderr to stdout so we see errors ## HTML start ################################################################## print $cgi->start_html(@start_html_params); ## Body ######################################################################## print "\n", $cgi->h1($database_title), "\n"; if ($mode eq "display_db" or $mode eq "search_db") { # Search / display the database. if ($mode eq "search_db") { print "

Search Results

\n"; } # Get the non-database-specific search parameters. If we're in search_db # mode, we overwrite $case_sensitive etc. unconditionally, so that when # those parameters are changed from the .desc defaults, the overriding will # happen correctly. In the display_db case we only overwrite # $case_sensitive etc. if they are explicitly set (e.g. someone passed # "gen_form_search_db::one_record_per=0" in the query section of the URL, # perhaps because they were using an external frame-generator script to # achieve always-onscreen column headings). if ($mode eq "search_db" or defined $cgi->param("gen_form_search_db::case_sensitive")) { $case_sensitive = $cgi->param("gen_form_search_db::case_sensitive"); } if ($mode eq "search_db" or defined $cgi->param("gen_form_search_db::interpret_strings")) { $interpret_strings=$cgi->param("gen_form_search_db::interpret_strings"); } if ($mode eq "search_db" or defined $cgi->param("gen_form_search_db::one_record_per")) { $one_record_per = $cgi->param("gen_form_search_db::one_record_per"); } if ($mode eq "search_db" or defined $cgi->param("gen_form_search_db::repeat_headings")) { $repeat_headings = $cgi->param("gen_form_search_db::repeat_headings"); } # Get the database-specific search parameters. foreach $i (0 .. $field_count - 1) { undef @param_vals; # assigning undef to defined var doesn't undef it if ($mode eq "search_db") { @param_vals = $cgi->param($fields[$i][0]); } else { # $mode eq "display_db" if ($fields[$i][1] eq "enum") { @param_vals = @{$fields[$i]}[2 .. $#{$fields[$i]}]; } else { @param_vals = (""); # simulate empty text entry field } } if ($fields[$i][1] eq "enum" and not defined @param_vals) { print "

ERROR: You unchecked all possible values for\n", "the \"", html_escape($fields[$i][0]), "\" field, which will always result in no\n", "match. You need to leave at least one value checked.

\n"; $doomed_to_failure = 1; } else { $parameters[$i] = [@param_vals]; $sort_order_param_name = "gen_form_search_db::" . $fields[$i][0] . " sort"; if ($mode eq "search_db") { $this_field_sort_order = $cgi->param($sort_order_param_name); } else { # $mode eq "display_db" $this_field_sort_order = $i + 1; } if (not defined $this_field_sort_order) { print "

ERROR: ", html_escape($sort_order_param_name), "\n", "parameter not specified.

\n"; $doomed_to_failure = 1; } else { if ($this_field_sort_order != 0) { $at_least_one_nonzero_sort = 1; if (defined $sort_position[$this_field_sort_order]) { print "

ERROR: Multiple fields given sort\n", "order $this_field_sort_order. Only one field per\n", "sort position, please.

\n"; $doomed_to_failure = 1; } } $sort_position[$this_field_sort_order] = $i; } } } if (not $at_least_one_nonzero_sort) { print "

ERROR: \"Don't Show\" was selected for all fields,\n", "which of course will result in no output.

\n"; $doomed_to_failure = 1; } if (not $doomed_to_failure) { if ($case_sensitive) { $match_options = ""; } else { $match_options = "(?i)"; } $row_number = 0; while () { $row_number++; @row_elements = split /[\n\t]/; if (scalar(@row_elements) > $field_count) { push @rows_with_too_many_fields, $row_number; } # See if this record matches (or, rather, fails to match). $match = 1; foreach $i (0 .. $field_count - 1) { if ($fields[$i][1] eq "enum") { if (not defined $row_elements[$i] or $row_elements[$i] eq "") { # Enum value is null, which is illegal. push @rows_with_null_enum_values, $row_number; $match = 0; last; } else { $matched_enum_val = 0; foreach $requested_enum_val (@{$parameters[$i]}) { if ($requested_enum_val eq $row_elements[$i]) { $matched_enum_val = 1; last; } } if (not $matched_enum_val) { $match = 0; $is_a_legal_value = 0; foreach $legal_value (@{$fields[$i]} [2 .. $#{$fields[$i]}]) { if ($row_elements[$i] eq $legal_value) { $is_a_legal_value = 1; last; } } if (not $is_a_legal_value) { push @rows_with_illegal_enum_values, $row_number; } last; } } } else { # $fields[$i][1] eq "html" or $fields[$i][1] eq "text" if ($parameters[$i][0] ne "") { # The user entered some text in this field -- it's not a # "don't care" item. if (not defined $row_elements[$i]) { # A regexp can match a null value, so if there is no # data in this element, we need to use the null # string. $row_element = ""; } else { $row_element = $row_elements[$i]; } if ($interpret_strings eq "plain") { $match_string = quotemeta($parameters[$i][0]); } else { $match_string = $parameters[$i][0]; } # TBD: Would be good to use eval to catch user regexp # errors here and make sure they get properly # HTML-formatted, but I believe eval can be a security # hole when it contains an arbitrary user input, as # regexps must be. Of course without doing HTML # escaping, cross-site scripting attacks become # possible, but gen_form_search_db is unlikely to be # used on sites that utilize cookies, and search results # pages are not designed to be linked to using GET # queries. if (not $row_element =~ m/$match_options$match_string/){ $match = 0; last; } } } } if ($match) { push @matching_rows, [@row_elements]; } } if (scalar @matching_rows) { print "
\n"; if ($one_record_per eq "column") { # Originally we only supported one record per row, so the # variables are named accordingly. Now we have this optional # one-record-per-column view, though. $nonstandard_sort_order = 0; undef $previous_sort_position; foreach $i (1 .. $field_count) { if (defined $sort_position[$i]) { if (defined $previous_sort_position and $previous_sort_position != $sort_position[$i] - 1) { $nonstandard_sort_order = 1; } $previous_sort_position = $sort_position[$i]; } } # For efficiency, we assume the database is already in the # default sort order, so we only need to sort for non-standard # sort orders. if ($nonstandard_sort_order) { @matching_rows = sort sort_rows @matching_rows; } foreach $i (1 .. $field_count) { if (defined $sort_position[$i]) { $output_rows_since_last_heading = 0; print "\n"; print "\n"; foreach $row (@matching_rows) { print "\n"; $output_rows_since_last_heading++; if ($repeat_headings > 0 and $output_rows_since_last_heading == $repeat_headings) { print "\n"; $output_rows_since_last_heading = 0; } } print "\n"; } } } else { # The normal case of one record per row. print "\n"; $nonstandard_sort_order = 0; undef $previous_sort_position; foreach $i (1 .. $field_count) { if (defined $sort_position[$i]) { print "\n"; if (defined $previous_sort_position and $previous_sort_position != $sort_position[$i] - 1) { $nonstandard_sort_order = 1; } $previous_sort_position = $sort_position[$i]; } } print "\n"; # For efficiency, we assume the database is already in the # default sort order, so we only need to sort for non-standard # sort orders. if ($nonstandard_sort_order) { @matching_rows = sort sort_rows @matching_rows; } $output_rows_since_last_heading = 0; foreach $row_index (0 .. $#matching_rows) { $row = $matching_rows[$row_index]; print "\n"; foreach $i (1 .. $field_count) { if (defined $sort_position[$i]) { print "\n"; } } print "\n"; $output_rows_since_last_heading++; if ($repeat_headings > 0 and $output_rows_since_last_heading == $repeat_headings) { if ($row_index == $#matching_rows) { print "\n"; } else { print "\n"; } foreach $i (1 .. $field_count) { if (defined $sort_position[$i]) { print "\n"; } } print "\n"; $output_rows_since_last_heading = 0; } } } print "
", html_escape($fields[$sort_position[$i]][0]), ""; if (defined $row->[$sort_position[$i]]) { if ($fields[$sort_position[$i]][1] eq "html") { print $row->[$sort_position[$i]]; } else { print html_escape($row->[$sort_position[$i]]); } } print "", html_escape($fields[$sort_position[$i]][0]), "
", html_escape($fields[$sort_position[$i]][0]), "
"; if (defined $row->[$sort_position[$i]]) { if ($fields[$sort_position[$i]][1] eq "html") { print $row->[$sort_position[$i]]; } else { print html_escape($row->[$sort_position[$i]]); } } print "
", html_escape($fields[$sort_position[$i]][0]), "
\n"; } else { print "

There are no records matching your search", " criteria.

\n"; } if (scalar @rows_with_illegal_enum_values) { print "

WARNING: The following lines in the database\n", "contain illegal enum values (and thus will never be shown): ", join(", ", @rows_with_illegal_enum_values), "

\n"; } if (scalar @rows_with_null_enum_values) { print "

WARNING: The following lines in the database\n", "contain null enum values (and thus will never be shown): ", join(", ", @rows_with_null_enum_values), "

\n"; } if (scalar @rows_with_too_many_fields) { print "

WARNING: The following lines in the database\n", "contain too many fields (the extras don't get shown): ", join(", ", @rows_with_too_many_fields), "

\n"; } } close DATA; print "

Search Form

\n"; print "

With this form, you can modify display of the database.\n"; } else { # $mode eq "gen_form" # Search form only. print "

With the form below, you can view all or portions of the\n", "database. To view the whole database in the default sort order\n", "and orientation, just hit the Search button.\n"; } # Now print the part of the instructions and search form common to both gen_form # and search_db modes. print "To view only selected records, type in text to search for. To\n", "omit fields, or to sort them in a different order, use the\n", "per-field radio buttons. To modify search or output behavior,\n", "use the controls at the bottom of the form.

\n"; print '
', "\n"; # Qualify with a gen_form_search_db "namespace" so we don't collide with a # user field called "database". print $cgi->hidden(-name=>'gen_form_search_db::database', -default=>$database_path), "\n"; print "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n"; foreach $field_number (1 .. $field_count) { print $cgi->th($field_number); } print "\n"; # Need to create this to stop radio_group() from adding redundant labels below. foreach $number (0 .. $field_count) { $blank_labels{$number} = ""; } foreach $field_number (1 .. $field_count) { $field_ref = $fields[$field_number - 1]; $field_name = $field_ref->[0]; $field_type = $field_ref->[1]; # The center alignment default is for the benefit of the radio buttons. # The other stuff overrides with ALIGN=LEFT. print ""; print "\n"; print "\n"; @radio_group = $cgi->radio_group(-name => "gen_form_search_db::$field_name sort", -values => [(0 .. $field_count)], -default => $field_number, -labels => \%blank_labels); foreach $radio_button (@radio_group) { print $cgi->td($radio_button), "\n"; } print "\n"; } print ""; print $cgi->td($cgi->submit("Search")), "\n"; print $cgi->td($cgi->reset("Reset Form")); print "
Don\'t ShowSort Order
FieldSelect
", html_escape($field_name), ""; if ($field_type eq "enum") { # This table encloses all the checkbox value columns. It has one # row and up to 3 columns, each a table with two columns and # multiple rows. print "", "\n"; @values = @{$field_ref}[2 .. $#{$field_ref}]; $value_count = @values; $rows = ceil($value_count / 3); # 3 == max columns foreach $i (0 .. 2) { # 2 == max columns - 1 $opened_innermost_table = 0; foreach $j (0 .. $rows - 1) { $index = ($i * $rows) + $j; if ($index < $value_count) { $value = $values[$index]; if (not $opened_innermost_table) { # If we haven't opened the innermost table yet, open # it now, before printing out its first row. We # have to wait till the last minute like this # because all three outer columns are not always # used (depending on how many enumerators there # are). If we always open the table before the # inner foreach and always close it afterwards, we # can end up with tables containing no rows, which # constitute illegal HTML. if ($i > 0) { # The outer table has no cellpadding or spacing, # so that everything lines up nicely, so we need # to separate the rows with non-breaking space # columns. Don't put one in front of first col. print $cgi->td("  "), "\n"; } # This table encloses the actual checkboxes and # their labels. It has two columns and multiple # rows. print "\n"; } } print "
\n"; $opened_innermost_table = 1; } print "\n"; print "', "\n", '\n"; } } if ($opened_innermost_table) { print "
', html_escape($value), "
"; } else { # $field_type eq "html" or $field_type eq "text" $field_size = $fields[$field_number - 1][2]; print $cgi->textfield(-name => $field_name, -size => $field_size); } print "
\n"; print "
\n"; print "\n"; print "\n"; print "\n"; print "\n"; # spacer print "\n"; print "\n"; # spacer print "\n"; print "\n"; print "\n"; print "\n"; print "\n"; # spacer print "', "\n"; print "\n"; # spacer print "\n"; print "\n"; print "
Interpret search strings as:   Searches and sorts are:   Output one record per:
', "\n"; print "plain text
\n"; print "', "\n"; # Note that we have to make these radio buttons manually rather than using # radio_group() since we want to use HTML markup in the button label. # Therefore, more straightforward to do ALL the search control fields manually. # # TBD: Check what version of perl we're actually running under and # look up *that* version of the docs? (Might not always be available.) print 'Perl regular expressions', ""; print "
case-sensitive\n"; print "', "\n"; print "row
\n"; print "', "\n"; print "column"; print "
\n"; print "

\n"; print "Repeat headings every:\n"; print '\n"; } else { print " VALUE=0>\n"; } print "record(s)\n"; print "

\n"; print "
\n"; ## Footer ###################################################################### print "
\n"; print "\n"; print "\n"; print "\n"; print "\n"; print "\n"; print "
"; # If a user footer was specified, put it here. if ($footer) { print $footer; } # The user-visible "Database last modified:" date is only based on the # modification timestamp of the DATA file. It shouldn't be updated just because # we, say, added extra enum values that we're not using yet to the DESC file, or # because we changed a comment in this script. @localtime = localtime $DATA_mod_time; print "Database last modified: ", (qw(January February March April May June July August September October November December))[$localtime[4]], " ", $localtime[3], ", ", 1900 + $localtime[5]; print ""; print 'Validated HTML 4.01 Transitional'; print "
\n"; print 'Generator: ' . "gen_form_search_db
\n"; ## HTML end #################################################################### print $cgi->end_html, "\n";