#!/usr/bin/perl # # gen_form_search_db # # AUTHOR: # Dan Harkless # # COPYRIGHT: # This file is Copyright (C) 2025 by Dan Harkless, and is released under the # GNU General Public License . # # DESCRIPTION: # Depending on how it is called, either displays, generates a search form for, # or searches a database in my proprietary plain-text format. # # What is my proprietary database format? Well, it's not too exotic. The # database itself (a file ending in ".data") is a standard TAB-separated # "flat-file" database. Each line corresponds to a record and each # TAB-separated string corresponds to a field. For efficiency, the database # is assumed to be already sorted in the default sort order -- we only # explicitly sort it if a non-standard sort order is chosen. Therefore, be # sure to run it through something like the UNIX sort(1) command after adding # rows. # # What makes my format interesting is the companion "description" file. If # you have a database called "books.data", you also need a description file # called "books.desc". The first line of that file is the name of the # database. The lines following that describe each field in the database. # # There are three different kinds of fields, "text", "html", and "enum". Say # the first field in the database is a "text" field. In this case, the line # following the title line in the .desc file will be the string "text", # followed by a TAB, then the name of the field. On the next line, there will # be an initial TAB followed by an integer, which tells this script how many # columns to make the text entry box for this field on the search form. A # text entry box is used because "text" fields can contain any random text. # # "html" fields are just like "text" fields except that they're not subjected # to HTML special character escaping. So if you want to use HTML tags in a # field, make it an "html" field. Otherwise, a string like # 'Douglas Adams' will be output as # that literal string, rather than as an active hyperlink. # # "enum" fields in the database, on the other hand, may contain only one of # the enumerated allowed values. enum field descriptions consist of the # string "enum", followed by a TAB, then the name of the field, as before. # Each subsequent line, however, consists of an initial TAB and then one of # the legal enumerated values (hopefully in alphabetic or some other logical # order). These enumerated values come out as individual check boxes on the # search form. # # How 'bout an example? Here's a toy "books.desc" (imagine there is no # initial "# " string on each line): # # Dan Harkless' Book Collection # enum Category # Biography # Sci-Fi # text Author(s) # 15 # text Title # 15 # # The corresponding "books.data" file would look something like this: # # Biography Lindskold, Jane M. Roger Zelazny # Sci-Fi Abbott, Edwin A. Flatland # Sci-Fi Zelazny, Roger To Die In Italbar # # Note that "To Die In Italbar" is over 15 characters. That's okay -- the # "15" only sets the width of the text entry field on the search form, but you # can type more text than that -- you just won't be able to see it all at # once. # # Besides the database field description lines, it's also possible to set # search parameter defaults in the .desc file. The names of the parameters # correspond to the NAMEs of the corresponding INPUTs (minus the # "gen_form_search_db::" prefix -- you can check out the HTML we generate to # see what I'm talking about here). For instance, the .desc file line: # # repeat_headings 10 # # will cause the "Repeat headings every: [__] record(s)" box to default to # "10" rather than the usual value of "0". # # Also, there are four parameters you can optionally set in the .desc file # that affect how the page is displayed. The first is "lang", and will # cause the URL specified to be placed in the LANG= attribute of the # tag: # # lang fr # # If "lang" is not specified, the HTML document language defaults to "en" # (English). To prevent any language from being specified, "lang" can be # specified in the .desc file with nothing but whitespace after it. # # The second parameter, "background", will cause the URL specified to be # placed in the BACKGROUND= attribute of the tag: # # background seamless_pattern_23.jpg # # (Note that'll technically result in illegal HTML, now that we're generating # HTML 5, but it's being kept for backwards compatibility. Backgrounds should # now be specified with a stylesheet in the next parameter instead.) # # The third parameter, "head_additions", consists of HTML to be added to the # section. The HTML can either be specified as one long line after # "head_additions" and a TAB: # # head_additions # # *or* as multiple successive lines after "head_additions" and a newline, then # terminated by "/head_additions" (can be omitted if this is the last # parameter in the .desc file): # # head_additions # # # # # # /head_additions # # and tags should not be added to this section since we # generate them programmatically. The head_additions lines will be put in the # <HEAD> section after the <BASE> and <META NAME=Generator> tags, and before # the <TITLE> tag. # # The fourth parameter, "footer", specifies HTML to use as the footer of the # page. However, it doesn't actually represent the entire footer, just the # user-modifiable portion. The default footer (if there is no user footer # specified) consists of an <HR> followed by a <TABLE> with "Database last # modified: <date>" left-justified on the left-hand side, and "Validated HTML # 5 + CSS" (linked to validator.w3.org and jigsaw.w3.org) and "Generator: # gen_form_search_db" (linked to the anchor of this script on my software web # page) right-justified on the right-hand side. If there's a user footer # specified, its contents will be placed on the left-hand side prior to the # database timestamp. # # This footer HTML can either be specified as one long line after "footer" and # a TAB: # # footer <A HREF="../">Up to Parent</A> # # *or* as multiple successive lines after "footer" and a newline, then # terminated by "/footer" (can be omitted if this is the last parameter in the # .desc file): # # footer # <A HREF="../"><IMG ALT="Up" HEIGHT=40 SRC="/common/up.gif" WIDTH=40></A> # <ADDRESS><A HREF="/dan/cgi/efd.pd" ID=efd>Dan Harkless</A></ADDRESS> # /footer # # Here's the above toy example repeated, but with all of these optional # parameters specified: # # Dan Harkless' Book Collection # enum Category # Biography # Sci-Fi # text Author(s) # 15 # text Title # 15 # interpret_strings regexp # case_sensitive 1 # one_record_per column # repeat_headings 10 # lang # background seamless_pattern_23.jpg # head_additions # <META CHARSET="UTF-8"> # <META NAME=viewport CONTENT="width=device-width, initial-scale=1"> # <LINK REL=apple-touch-icon HREF="/apple-touch-icon.png"> # <LINK REL=icon HREF="/favicon.ico"> # <LINK REL=stylesheet HREF="/common/stylesheet.css"> # /head_additions # footer # <A HREF="../"><IMG ALT="Up" HEIGHT=40 SRC="/common/up.gif" WIDTH=40></A> # <ADDRESS><A HREF="/dan/cgi/efd.pd" ID=efd>Dan Harkless</A></ADDRESS> # /footer # # Okay, after all this blathering, you're no doubt interested to know how to # call the script. Let's take the example of database files called # <wwwroot>/collections/music/CDs.{data,desc}, with the script located at # <wwwroot>/collections/gen_form_search_db.cgi. There are actually 4 # different ways in which you can choose to call gen_form_search_db on your # database: # # 1. http://wwwhost/collections/gen_form_search_db.cgi?music/CDs # # When passing the database path (note we don't include a trailing ".data" # or ".desc" -- those suffixes will get appended when opening the # appropriate files) as a query string (HTTP GET method), # gen_form_search_db goes into "gen_form" mode. In this mode, it merely # displays a search form. Once you submit the form (you can submit without # changing any search parameters to get the whole database in the default # sort order), the script will get called via method #2, below, and the # data will be displayed. # # 2. http://wwwhost/collections/gen_form_search_db.cgi # (POST parameters include "gen_form_search_db::database=music/CDs") # # gen_form_search_db gets called like this when the search form is # submitted. This is "search_db" mode. The POSTed parameters determine # what subset of the database data is displayed, and in what order. Below # the table of search results is another copy of the search form, so you # can try a new search without having to back up in your browser history. # # Since this mode uses POST by default, you can't link directly to # particular search results from another web page. Actually, though, the # mode can also work using HTTP GET -- this is how the validator.w3.org # link in the footer is able to function. Linking to particular search # results that way is not recommended, though, since the URL would be # invalidated next time the .desc file was updated. # # 3. http://wwwhost/collections/gen_form_search_db.cgi/collections/music/CDs # # When passing the database path (which must be absolute -- note the # repetition of "/collections" here) to the script using the so-called # "PATH_INFO" part of the URL, gen_form_search_db goes into "display_db" # mode. The entire database is displayed, followed by the search form. # # This calling style and #4 below (which also goes into "display_db" mode) # are useful when you want the entry point to your database be a display of # the data itself, rather than just a search form which requires hitting # the submit button before data will appear. # # Note that normal search engines do not do any form submissions, so if you # want your database to be indexed, you'll have to make a "display_db" mode # link to it. Most search engines also do not index pages that they think # are CGI-generated, which is why we avoid using a '?' character in the URL # in this mode and the next one. # # A sneaky feature you can use in this mode is overriding .desc-settable # search parameters by passing them in a GET query. One case where this is # definitely useful is to override a nonzero "repeat_headings" from the # .desc if you're using an external frame-generating script (like my # "frame_generator") to keep the table headings onscreen at all times. For # example: # # http://wwwhost/collections/gen_form_search_db.cgi/collections/music/CDs?g # en_form_search_db::repeat_headings=0 # # 4. http://wwwhost/collections/music/CDs/ # # As mentioned in #3 above, this URL style will also put gen_form_search_db # in "display_db" mode. Note that this method assumes a slightly different # file layout than the above examples. Rather than there being # <wwwroot>/collections/music/CDs.{data,desc} files, there are # <wwwroot>/collections/music/CDs/{data,desc} files. Also, we have # <wwwroot>/collections/music/CDs/index.cgi -- a symlink or hard link to # gen_form_search_db's real location (or a copy of it, if you're # unfortunate enough not to be able to make links on your system). # # To get this method to work, you need some cooperation from your # webserver. On Apache, you need to make sure this directory has "Options # FollowSymLinks" (assuming you're not using a hard link or copy), plus the # server's "DirectoryIndex" directive will need to include "index.cgi", # and "AddHandler cgi-script .cgi" will need to be uncommented. # # If you want to use the implicit "./data" and "./desc" database file # locations but you don't want to have "index.cgi" in your "DirectoryIndex" # list for some reason, you can mention the script name explicitly, as in: # # http://wwwhost/collections/music/CDs/gen_form_search_db.cgi # # That will still require you to have a gen_form_search_db.cgi link or copy # in each directory containing a "data" / "desc" file pair, of course. # # As in the PATH_INFO version of display_db mode, you can override .desc- # set search parameters if you need to: # # http://wwwhost/collections/music/CDs/?gen_form_search_db::repeat_headings # =0 # # For examples of this script in action, please see the links in the # gen_form_search_db description on my Software page (URL at the top of the # file). # # VERSION: $version = "2025-08-07"; # # DATE MODIFICATION # ========== ================================================================== # 2025-08-07 Added "padding: 2px" to TABLE.bordered TD & TH (& updated below). # 2025-08-03 We now generate HTML 5, rather than 4.01 Transitional. If the # user doesn't specify a stylesheet in the head_additions field, we # output the following inline mini-stylesheet: # # :root { # background-color: #151515; # color: white; # } # A:link {color: dodgerblue} # A:visited {color: mediumpurple} # A:active {color: crimson} # TABLE {border-collapse: collapse} # TABLE.bordered {border: 1px solid} # TABLE.bordered TD {border: 1px solid; padding: 2px} # TABLE.bordered TH {border: 1px solid; padding: 2px} # TD.left {text-align: left} # TD.right {text-align: right} # TH.left {text-align: left} # TH.middle {vertical-align: middle} # TR.bottom {vertical-align: bottom} # TR.center {text-align: center} # TR.middle {vertical-align: middle} # TR.top {vertical-align: top} # # If a stylesheet _is_ specified, it must contain compatible # definitions for the above class names (doesn't need to match the # above styling _exactly_). Mostly for HTML compactness, I decided # not to avoid potential name conflict issues by either putting # image_album in all the class names, or doing all the above with # inline STYLE= attributes. We also now default to UTF-8 rather # than ISO-8859-1. A copy of the last HTML 4.01 Transitional # version (2016-07-04) has been saved at <https://harkless.org/dan/s # oftware/old/gen_form_search_db.HTML4>. # 2016-07-04 No longer require if.pm to do 'use warnings' only for Perl 5.6.0+. # 'use warnings' has been legal in up-to-date 'perl's since 2000. # 2016-07-03 Although 'lang', if specified, comes from a user-controlled file, # html_escape() it just in case. # 2016-06-29 Added 'lang' parameter to 'desc' file; default to "en" without it. # 2016-03-23 Changed 'favicon' specification in the 'desc' file to a general- # purpose 'head_additions' section, terminated by '/head_additions' # (if multi-line). 'favicon' is actually still supported, but is # now an undocumented option. Changed multi-line 'footer' to # optionally be terminated by '/footer' rather than EOF. # 2016-03-23 Changed 'not defined @param_vals' to 'not @param_vals' per # deprecation warning in Perl 5.16.*. # 2008-12-31 Having stderr redirected to stdout resulted in 2 XSS holes from a # default Perl error if someone followed a link with a non-numeric # value stuck into repeat_headings or one of the "sort" parameters. # Fixed those, but then came across another XSS hole that occurred # when Perl output a regexp syntax error (e.g. if a regular # expression started with a '?' character), so changed the script to # use CGI::Carp rather than stderr redirection to stdout, for safe # catching and browser output of all fatal errors. Also modified # the default die() output so that the path to the script on the # server is not revealed in the error output to the browser -- only # in the stderr (Apache error_log) version. Due to the design of # CGI::Carp, non-fatal errors now only show up in the error_log and # as HTML comments. # 2008-09-02 "use English qw(-no_match_vars)": avoid regex performance penalty. # 2006-12-20 Rearranged code so we could put out the newest of the modification # times of the 'data' file, the 'desc' file, and this script as the # Last-Modified: header. This, plus Apache's automatic handling of # If-Modified-Since:, will result in cross-session cacheability and # bandwidth savings. The user-visible "Database last modified:" # date in the footer remains based solely on the modification time # of the 'data' file. # 2006-12-20 Use the new 'use warnings' rather than -w on the shebang line so # we only output warnings in this script, not in modules we use. # To do this in a backwards-compatible way, the if.pm module # (available from CPAN) is now required. # 2006-12-11 Changed the hardcoded HTML type from 3.2 to 4.01 Transitional to # accomodate my new footers that use JavaScript-obfuscated mailto: # links. TBD: Make the doctype a parameter in the desc file? # 2006-04-07 To minimize unnecessary filesystem I/O and consolidate all the # settings into one place, removed support for the background.url # and footer.html files in favor of 'background' and 'footer' params # in the .desc file. Also added a 'favicon' parameter. # 2006-02-16 Removed \n from Generator -- Firefox displays it as a funky glyph. # 2005-05-19 perldoc.com has been down for a while, and may not come back up. # Changed http://www.perldoc.com/latest/pod/perlre.html link to # http://perldoc.perl.org/perlre.html. # 2003-06-29 Output the time as of a minute ago as the Last-Modified header. # 2003-06-29 Allowed override of .desc settings (e.g. to overcome nonzero # repeat_headings when using frames) using GET in display_db mode. # 2003-06-29 Added display_db mode, triggered via a database specified either # implicitly or using PATH_INFO. Can be spidered by search engines. # 2003-06-28 Search results page now duplicates the search form at the bottom, # with the fields filled in per the search that was just done. # 2003-06-28 Removed the UP_URL{<url>} processing in the footer, since after # my upcoming changes, the search form will no longer logically be # the parent of a given results page. Multiple consecutive searches # should be thought of as lateral motion, with the up arrow allowing # you to go directly to the parent page that called # gen_form_search_db, without having to hit Back multiple times. # 2003-06-28 Added ability to change search parameter defaults in the .desc. # 2003-03-15 Use CGI.pm's DISABLE_UPLOADS and POST_MAX variables to protect # against DoS attacks. POSTs of more than 1 MiB will error. # 2003-03-10 Using $cgi->td() on database field name "Copy & paste" was causing # double-HTML-escaping and an error. Now rather than calling html_ # escape() when we read the .desc file, we just call it on output, # if needed (not needed if output via CGI.pm's routines, like td()). # 2003-02-10 Now warn if a line in the database contains too many fields. # 2003-01-01 Added "Repeat headings every: <input> records" feature. # 2002-12-17 Now warn about illegal enum values as well as null ones. # 2002-12-06 Instructions at the top of the form needed to be updated to # reflect the recently-added capabilities. Also moved $version # setting to up above to help prevent forgetting to update it. # 2002-11-22 Made the "case-sensitive" checkbox apply not just to the search # strings, but also to database sorts. # 2002-11-21 Added a new field type, "html", which is just like "text" except # that special HTML characters don't get escaped on output (e.g. # '<' and '>' don't get output as "<" and ">"). # 2002-11-21 Added a new radio button to the search form, "Output one record # per: (O) row ( ) column". It does what it says. # 2002-11-21 That still wasn't enough to generate legal HTML 3.2. CGI.pm 2.69 # through 2.88 unconditionally generate a lang= attribute on the # <html> tag, and there's no such thing in HTML 3.2. Patched my # copy of CGI.pm to not generate the attribute if -lang=>"" is # passed to start_html(). Sent the patch to Lincoln Stein, so # hopefully it'll be present in 2.89 and on. # 2002-11-21 CGI.pm versions 2.69 and above require the new -no_xhtml pragma or # validator.w3.org will not validate the output as HTML 3.2. # 2002-11-21 Need to check in sort_rows() if the fields are defined before # comparing them. For instance, if there's no data for the last # field in a record, the database owner might forget a trailing TAB. # 2002-02-27 The actual searching and sorting features were TBDs until now -- # the entire database was always displayed, in default sort order. # 2002-02-22 Now instead of blindly cat'ing the footer.html file, process it, # transmogrifying the string UP_URL{<url>} as per above description. # 2002-02-18 Previously only worked with relative paths to the database, but # that kind of sucks for people forced to use a global /cgi-bin dir # rather than being able to put the script near the database. Got # absolute paths working. While doing this, removed taint checking, # since it wasn't doing anything useful -- just restricting the # characters usable in pathnames (we don't use any subshells, so # there isn't that concern). # 2002-02-18 Use $OS_ERROR rather than rolling our own dir/file access errors. # 2002-02-16 Added "charset=ISO-8859-1" to the HTTP header to eliminate # validator.w3.org warning. If that ever causes a problem for # someone who wants to use this script with a different character # set, I'll add a way to customize it via file or environment. # 2002-02-16 Added "Validated HTML 3.2" and "Generator: gen_form_search_db" # links right-justified in footer; added live example link above. # 2000-06-01 No longer consider .cgi part of the name of the script. In some # installations it might have a different extension or none at all. # 2000-05-05 In the past, CGI.pm didn't initialize $cgi->keywords if no # arguments were specified in the searchpart of the URL (as during a # POST), which would result in a warning when diagnostics were # turned on. Therefore, if we wanted warnings active, we needed to # `use diagnostics' and surround the `@keyword = $cgi->keywords' # with `disable diagnostics' and `enable diagnostics'. # Unfortunately perl v5.6.0 has a bug that produces a diagnostic # when you say `disable diagnostics'. Luckily, the $cgi->keywords # warning no longer seems to appear. We could just leave # `use diagnostics' and get rid of the disable/enable, but a Usenet # thread alerted me to the fact that `use diagnostics' entails a # great startup delay, and I verified this experimentally. # Therefore, just backing off to -w on the shebang line. The # `use diagnostics' below can be uncommented for verbose warnings # when debugging. # 1999-07-07 Use $cgi->url() instead of hardcoding script name in FORM ACTION. # 1999-04-16 Merged generate_search_form and search_database into # gen_form_search_db to eliminate duplicated code w/o making pm. # 1999-03-03 Use latest CGI.pm's new -dtd option to say that we're HTML 3.2. # 1998-07-07 Output Generator META tag with name, version, and author of this # script. Be sure to update the META version below when revising! # 1997-11-25 Original (generate_search_form and search_database). ## Modules used ################################################################ use English qw(-no_match_vars); # allow use of names like @ARG rather than @_ use CGI qw(-no_xhtml); # need -no_xhtml for CGI.pm 2.69+ or get bad HTML verif. use CGI::Carp qw(fatalsToBrowser set_message warningsToBrowser); # safe errors BEGIN { sub CGI_Carp_handler { # Strip off the leading path from the script name. ($program_name_no_path = $PROGRAM_NAME) =~ s<.*/><>; # Don't include the pathname on the server of this script in the version # of the error that we display to the user (the version going to stderr # will still have it). $error = shift; if ($error =~ m<^(.*? )(/.*?)( line \d+\.)>) { $error = $1 . $program_name_no_path . $3; } if (not defined $cgi) { $cgi = new CGI; } print $cgi->h1("ERROR from $program_name_no_path:"), "\n"; print $error, "\n"; # TBD: Allow for an optional message on how to contact the webmaster? } set_message(\&CGI_Carp_handler); } use File::Basename; # for fileparse() use POSIX; # for ceil() use warnings; # output warnings for this script but not for modules used ## Subroutines ################################################################# sub html_escape { # This is a copy & paste of CGI.pm's internal escapeHTML() routine except # without the here-meaningless 'dontescape' checking. my $toencode = shift; return undef unless defined($toencode); $toencode=~s/&/&/g; $toencode=~s/\"/"/g; # would have to be '"' if HTML 3.2 were used $toencode=~s/>/>/g; $toencode=~s/</</g; return $toencode; } sub sort_rows { my $i; foreach $i (1 .. $field_count) { if (defined $sort_position[$i] and defined $a->[$sort_position[$i]] and defined $b->[$sort_position[$i]]) { my $string_a = $a->[$sort_position[$i]]; my $string_b = $b->[$sort_position[$i]]; if (not $case_sensitive) { $string_a = lc($string_a); $string_b = lc($string_b); } # TBD: Strip out the HTML markup when sorting so that '<A # HREF="Z">A</A>' and '<A HREF="Y">B</A>' will properly sort (A, B) # rather than (B, A). As it is now, we need to format the html data # values like 'B (<A HREF="Y">URL</A>)' for correct sorting. The # problem is that even if we implemented this, the sort-lines # command in Emacs would no longer produce the expected results, and # trying to visually determine whether a line was sorted correctly # would be difficult, so we'd pretty much need to implement a DB # editor more specialized than a text editor (but without losing any # of the advantages of using a text editor on the server -- thus it # would need to be a TTY or web application) to be able to take # advantage of this. # if ($fields[$i][1] eq "html") { # # } $potential_result = $string_a cmp $string_b; if ($potential_result != 0) { return $potential_result; } } } return 0; } sub url_escape { # This is a copy & paste of CGI.pm's internal escape() routine, except that # that routine was overly restrictive -- it disallowed [!~*'()] (presumably # because they're shell-special characters) even though they're perfectly # legal to appear unquoted in URLs. I've also caused '/' not to be quoted # since the only way we're going to get it in the URL and have it not mean # the normal URL path separate character is for it to be in the query part # of the URL, where it's legal to appear unescaped. Besides, # ?books-have/fiction reads better than ?books-have%2Ffiction. my $toencode = shift; return undef unless defined($toencode); $toencode =~ s<([^a-zA-Z0-9_.\-!~*\'()/])><uc sprintf("%%%02x",ord($1))>eg; return $toencode; } ## Initializations ############################################################# $OUTPUT_AUTOFLUSH = 1; # let the browser render as dynamically as possible $CGI::DISABLE_UPLOADS = 1; $CGI::POST_MAX = 1048576; # 1 MiB $cgi = new CGI; if ($cgi->cgi_error()) { # Surprisingly, current browsers (as of 2003) don't respond properly to HTTP # status code 413, so we'll generate an HTML error page rather than calling # $cgi->header($cgi->cgi_error()). die($cgi->cgi_error(), " Error occurred"); } $head_start = "<HEAD>\n" . "<META NAME=Generator CONTENT=\n" . ' "gen_form_search_db version ' . $version . ', by Dan Harkless' . ' -- http://harkless.org/dan/software/">' . "\n"; $lang = "en"; # First override opportunity for these is while we're reading the .desc file. # Second is if we're searching the database and corresponding param. was posted. $case_sensitive = 0; $interpret_strings = "plain"; $one_record_per = "row"; $repeat_headings = 0; ## Required Parameters ######################################################### @keyword = $cgi->keywords; # get searchpart of URL -- won't grab "<var>=<val>" $path_info = $cgi->path_info; # Figure out which mode we're in and get the name of the database. if (@keyword > 0) { # We were called with a database in the searchpart of the URL, so we're in # generate search form mode. $mode = "gen_form"; $database_path = $keyword[0]; } elsif ($path_info) { # We were called with the database in the additional path info part of the # URL, so we're in display database mode. $mode = "display_db"; $database_path = $path_info; } else { # No database in the URL. if ($cgi->param('gen_form_search_db::database')) { # We're being called (presumably by ourselves) in search database mode. $mode = "search_db"; $database_path = $cgi->param('gen_form_search_db::database'); } elsif (-e "data") { # We're implicitly using "data" in the current directory in display # database mode. TBD: Glob for *data so the file can be implicitly # selected yet still be called <name>.data? An apparent 5.8.0 globbing # bug made this fail when I started to implement it. Can work around # with readdir(), but... $mode = "display_db"; $database_path = "./"; # dummy path } else { die("Database name not provided in URL or", " gen_form_search_db::database POST", ' parameter, and there is no "data" file', " in the current directory. Error occurred"); } } if ($database_path eq "./") { $data_file = "data"; $desc_file = "desc"; } ($database_name, $database_dir) = fileparse $database_path; if (not defined $data_file) { $data_file = "$database_name.data"; $desc_file = "$database_name.desc"; } $chdir_dir = $database_dir; if ($database_dir ne "./") { $base = $cgi->url; if ($database_dir =~ m<^/>) { # Absolute dir specified -- remove all after "protocol://<hostname>" for # base URL. $base =~ s<^([^/]+//[^/]+).*$><$1>; $document_root = $ENV{DOCUMENT_ROOT}; if (not defined $document_root) { die("Absolute directory specified yet webserver" . " doesn't set \$ENV{DOCUMENT_ROOT}. Error occurred"); } $chdir_dir = "$document_root/$database_dir"; } else { # Relative dir specified -- for the base, just remove our script name # and any PATH_INFO and let the webserver interpret any '../'s that get # appended to resulting URL. $base =~ s<$path_info$><>; $base =~ s</[^/]+$></>; } # We need to pretend this document was fetched from the specified database # directory so that relative links in the customizable .html and .url files # will work. $base .= $database_dir; # this ends with a '/' # TBD: Can't url_escape() $base since it'll change "http://" to "http%3A//": $head_start .= "<BASE HREF=\"$base\">\n"; } ## Input files ################################################################# if (not chdir($chdir_dir)) { die('"', html_escape($database_dir), '": ', html_escape($OS_ERROR), ". Error occurred"); } elsif (not open(DATA, $data_file)) { die('"', html_escape($data_file), '": ', html_escape($OS_ERROR), ". Error occurred"); } elsif (not open(DESC, $desc_file)) { die('"', html_escape($desc_file), '": ', html_escape($OS_ERROR), ". Error occurred"); } else { # Get database modification time. @file_stat = stat DATA; $DATA_mod_time = $file_stat[9]; if ($mode eq "gen_form") { # When just displaying the search form, we're done with the db by this # point. close DATA; } # Look at the modification time of the DESC file and this script itself. # The newest of those two and the DATA file will be output as the # Last-Modified: header. @file_stat = stat DESC; $DESC_mod_time = $file_stat[9]; @file_stat = stat $PROGRAM_NAME; # TBD: handle possible failure here? $script_mod_time = $file_stat[9]; # Get title. chomp($database_title = <DESC>); $title = $database_title; if ($mode eq "search_db") { $title .= ": Search Results"; } # Get field descriptions. while (<DESC>) { ($field_type, $field_name_or_value, $newline) = split /[\n\t]/; if ($continuing_enum and $field_type) { # The previous line was the last value for that enum. $fields[$field_count++] = [$continuing_enum, "enum", @enum_values]; undef $continuing_enum; undef @enum_values; } if (not $field_type) { push @enum_values, $field_name_or_value; } elsif ($field_type eq "enum") { $continuing_enum = $field_name_or_value; } elsif ($field_type eq "html" or $field_type eq "text") { ($field_type_next, $field_size, $newline) = split /[\n\t]/, <DESC>; if ($field_type_next) { die("'$field_type' field '", html_escape($field_name_or_value), "' missing size limit. Error occurred"); } $fields[$field_count++] = [$field_name_or_value, $field_type, $field_size]; } elsif ($field_type eq "case_sensitive") { $case_sensitive = $field_name_or_value; } elsif ($field_type eq "interpret_strings") { $interpret_strings = $field_name_or_value; } elsif ($field_type eq "one_record_per") { $one_record_per = $field_name_or_value; } elsif ($field_type eq "repeat_headings") { $repeat_headings = $field_name_or_value; } elsif ($field_type eq "background") { $background = $field_name_or_value; } elsif ($field_type eq "lang") { $lang = html_escape($field_name_or_value); } elsif ($field_type eq "favicon") { $head_start .= '<LINK REL=icon HREF="' . html_escape($field_name_or_value) . "\">\n"; } elsif ($field_type eq "footer") { $footer = ""; if ($field_name_or_value) { $footer = "$field_name_or_value\n"; } while (<DESC>) { if ($ARG =~ m(^/footer)) { last; } else { $footer .= $ARG; } } } elsif ($field_type eq "head_additions") { $head_additions = ""; if ($field_name_or_value) { $head_additions = "$field_name_or_value\n"; } while (<DESC>) { if ($ARG =~ m(^/head_additions)) { last; } else { $head_additions .= $ARG; if ($ARG =~ /link.*rel.*=.*stylesheet/i) { $had_a_stylesheet = 1; } } } } else { die("Unknown field type '", html_escape($field_type), "'. Error occurred"); } } if ($continuing_enum) { $fields[$field_count++] = [$continuing_enum, "enum", @enum_values]; } close DESC; } ## HTTP header ################################################################# # For the Last-Modified: header, output the newest of the modification # timestamps of the DATA file, the DESC file, and this script. This will allow # caching of our results (when called via GET, at least -- putting out # Last-Modified: for POST results apparently doesn't allow caching but doesn't # seem to cause any harm, either). Note that if you have to upgrade CGI.pm (or # another module) due to a bug, the Last-Modified: header won't reflect this and # clients may still see the old buggy output. You can simply 'touch' this # script in such cases to work around this. # # Surprisingly, we don't need to do our own checking for the If-Modified-Since: # header (or related) and outputting of 304 Not Modified. We can just output # our status 200 as usual, along with the semi-static Last-Modified: timestamp, # and Apache (at least as of 2.0.52 plus Red Hat AS 4 backported patches) will # change that to a 304, when appropriate, for us. # # TBD: Output ETag headers too? Would this require us to avoid colliding with # Apache's own ETag values? $mod_time = $script_mod_time; if ($DATA_mod_time > $mod_time) { $mod_time = $DATA_mod_time; } if ($DESC_mod_time > $mod_time) { $mod_time = $DESC_mod_time; } # Note we can't use POSIX::strftime() because it'll output an illegal date if # the default locale the web server's running under isn't English. use vars qw($yday $isdst); # eliminate warnings ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) = gmtime($mod_time); @MON = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec); @WDAY = qw(Sun Mon Tue Wed Thu Fri Sat); $Last_Modified = sprintf("%s, %02d %s %04d %02d:%02d:%02d GMT", $WDAY[$wday], $mday, $MON[$mon], $year + 1900, $hour, $min, $sec); print $cgi->header(-charset => "UTF-8", -Last_Modified => $Last_Modified, -type => "text/html"); ## HTML start ################################################################## print "<!DOCTYPE HTML>\n"; print "<HTML"; if ($lang =~ /\S+/) { print " LANG=\"$lang\""; } print ">\n"; print $head_start; if (defined $head_additions) { print $head_additions; } print '<TITLE>' . html_escape($title) . "\n"; if (not $had_a_stylesheet) { print "\n"; } print "\n"; ## Body ######################################################################## print "\n"; print $cgi->h1($database_title), "\n"; if ($mode eq "display_db" or $mode eq "search_db") { # Search / display the database. if ($mode eq "search_db") { print "

Search Results

\n"; } # Get the non-database-specific search parameters. If we're in search_db # mode, we overwrite $case_sensitive etc. unconditionally, so that when # those parameters are changed from the .desc defaults, the overriding will # happen correctly. In the display_db case we only overwrite # $case_sensitive etc. if they are explicitly set (e.g. someone passed # "gen_form_search_db::one_record_per=0" in the query section of the URL, # perhaps because they were using an external frame-generator script to # achieve always-onscreen column headings). if ($mode eq "search_db" or defined $cgi->param("gen_form_search_db::case_sensitive")) { $case_sensitive = $cgi->param("gen_form_search_db::case_sensitive"); } if ($mode eq "search_db" or defined $cgi->param("gen_form_search_db::interpret_strings")) { $interpret_strings=$cgi->param("gen_form_search_db::interpret_strings"); } if ($mode eq "search_db" or defined $cgi->param("gen_form_search_db::one_record_per")) { $one_record_per = $cgi->param("gen_form_search_db::one_record_per"); } if ($mode eq "search_db" or defined $cgi->param("gen_form_search_db::repeat_headings")) { $repeat_headings = $cgi->param("gen_form_search_db::repeat_headings"); if ($repeat_headings !~ /^\d+$/) { print "

ERROR: Non-numeric value for repeat_headings", " (", html_escape($repeat_headings), ").

\n"; $doomed_to_failure = 1; # technically, we could continue... } } # Get the database-specific search parameters. foreach $i (0 .. $field_count - 1) { undef @param_vals; # assigning undef to defined var doesn't undef it if ($mode eq "search_db") { @param_vals = $cgi->param($fields[$i][0]); } else { # $mode eq "display_db" if ($fields[$i][1] eq "enum") { @param_vals = @{$fields[$i]}[2 .. $#{$fields[$i]}]; } else { @param_vals = (""); # simulate empty text entry field } } if ($fields[$i][1] eq "enum" and not @param_vals) { print "

ERROR: You unchecked all possible values for\n", "the \"", html_escape($fields[$i][0]), "\" field, which will always result in no\n", "match. You need to leave at least one value checked.

\n"; $doomed_to_failure = 1; } else { $parameters[$i] = [@param_vals]; $sort_order_param_name = "gen_form_search_db::" . $fields[$i][0] . " sort"; if ($mode eq "search_db") { $this_field_sort_order = $cgi->param($sort_order_param_name); } else { # $mode eq "display_db" $this_field_sort_order = $i + 1; } if (not defined $this_field_sort_order) { print "

ERROR: ", html_escape($sort_order_param_name), "\n", "parameter not specified.

\n"; $doomed_to_failure = 1; } elsif ($this_field_sort_order !~ /^\d+$/) { print "

ERROR: Non-numeric value for ", html_escape($sort_order_param_name), " (", html_escape($this_field_sort_order), ").

\n"; $doomed_to_failure = 1; } else { if ($this_field_sort_order != 0) { $at_least_one_nonzero_sort = 1; if (defined $sort_position[$this_field_sort_order]) { print "

ERROR: Multiple fields given sort\n", "order $this_field_sort_order. Only one field per\n", "sort position, please.

\n"; $doomed_to_failure = 1; } } $sort_position[$this_field_sort_order] = $i; } } } if (not $at_least_one_nonzero_sort) { print "

ERROR: \"Don't Show\" was selected for all fields,\n", "which of course will result in no output.

\n"; $doomed_to_failure = 1; } if (not $doomed_to_failure) { if ($case_sensitive) { $match_options = ""; } else { $match_options = "(?i)"; } $row_number = 0; while () { $row_number++; @row_elements = split /[\n\t]/; if (scalar(@row_elements) > $field_count) { push @rows_with_too_many_fields, $row_number; } # See if this record matches (or, rather, fails to match). $match = 1; foreach $i (0 .. $field_count - 1) { if ($fields[$i][1] eq "enum") { if (not defined $row_elements[$i] or $row_elements[$i] eq "") { # Enum value is null, which is illegal. push @rows_with_null_enum_values, $row_number; $match = 0; last; } else { $matched_enum_val = 0; foreach $requested_enum_val (@{$parameters[$i]}) { if ($requested_enum_val eq $row_elements[$i]) { $matched_enum_val = 1; last; } } if (not $matched_enum_val) { $match = 0; $is_a_legal_value = 0; foreach $legal_value (@{$fields[$i]} [2 .. $#{$fields[$i]}]) { if ($row_elements[$i] eq $legal_value) { $is_a_legal_value = 1; last; } } if (not $is_a_legal_value) { push @rows_with_illegal_enum_values, $row_number; } last; } } } else { # $fields[$i][1] eq "html" or $fields[$i][1] eq "text" if ($parameters[$i][0] ne "") { # The user entered some text in this field -- it's not a # "don't care" item. if (not defined $row_elements[$i]) { # A regexp can match a null value, so if there is no # data in this element, we need to use the null # string. $row_element = ""; } else { $row_element = $row_elements[$i]; } if ($interpret_strings eq "plain") { $match_string = quotemeta($parameters[$i][0]); } else { $match_string = $parameters[$i][0]; } # TBD: Would be good to use eval to catch user regexp # errors here and make sure they get properly # HTML-formatted, but I believe eval can be a security # hole when it contains an arbitrary user input, as # regexps must be. Of course without doing HTML # escaping, cross-site scripting attacks become # possible, but gen_form_search_db is unlikely to be # used on sites that utilize cookies, and search results # pages are not designed to be linked to using GET # queries. if (not $row_element =~ m/$match_options$match_string/){ $match = 0; last; } } } } if ($match) { push @matching_rows, [@row_elements]; } } if (scalar @matching_rows) { print "\n"; if ($one_record_per eq "column") { # Originally we only supported one record per row, so the # variables are named accordingly. Now we have this optional # one-record-per-column view, though. $nonstandard_sort_order = 0; undef $previous_sort_position; foreach $i (1 .. $field_count) { if (defined $sort_position[$i]) { if (defined $previous_sort_position and $previous_sort_position != $sort_position[$i] - 1) { $nonstandard_sort_order = 1; } $previous_sort_position = $sort_position[$i]; } } # For efficiency, we assume the database is already in the # default sort order, so we only need to sort for non-standard # sort orders. if ($nonstandard_sort_order) { @matching_rows = sort sort_rows @matching_rows; } foreach $i (1 .. $field_count) { if (defined $sort_position[$i]) { $output_rows_since_last_heading = 0; print "\n"; print "\n"; foreach $row (@matching_rows) { print "\n"; $output_rows_since_last_heading++; if ($repeat_headings > 0 and $output_rows_since_last_heading == $repeat_headings) { print "\n"; $output_rows_since_last_heading = 0; } } print "\n"; } } } else { # The normal case of one record per row. print "\n"; $nonstandard_sort_order = 0; undef $previous_sort_position; foreach $i (1 .. $field_count) { if (defined $sort_position[$i]) { print "\n"; if (defined $previous_sort_position and $previous_sort_position != $sort_position[$i] - 1) { $nonstandard_sort_order = 1; } $previous_sort_position = $sort_position[$i]; } } print "\n"; # For efficiency, we assume the database is already in the # default sort order, so we only need to sort for non-standard # sort orders. if ($nonstandard_sort_order) { @matching_rows = sort sort_rows @matching_rows; } $output_rows_since_last_heading = 0; foreach $row_index (0 .. $#matching_rows) { $row = $matching_rows[$row_index]; print "\n"; foreach $i (1 .. $field_count) { if (defined $sort_position[$i]) { print "\n"; } } print "\n"; $output_rows_since_last_heading++; if ($repeat_headings > 0 and $output_rows_since_last_heading == $repeat_headings) { if ($row_index == $#matching_rows) { print "\n"; } else { print "\n"; } foreach $i (1 .. $field_count) { if (defined $sort_position[$i]) { print "\n"; } } print "\n"; $output_rows_since_last_heading = 0; } } } print "
", html_escape($fields[$sort_position[$i]][0]), ""; if (defined $row->[$sort_position[$i]]) { if ($fields[$sort_position[$i]][1] eq "html") { print $row->[$sort_position[$i]]; } else { print html_escape($row->[$sort_position[$i]]); } } print "", html_escape($fields[$sort_position[$i]][0]), "
", html_escape($fields[$sort_position[$i]][0]), "
"; if (defined $row->[$sort_position[$i]]) { if ($fields[$sort_position[$i]][1] eq "html") { print $row->[$sort_position[$i]]; } else { print html_escape($row->[$sort_position[$i]]); } } print "
", html_escape($fields[$sort_position[$i]][0]), "
\n"; } else { print "

There are no records matching your search", " criteria.

\n"; } if (scalar @rows_with_illegal_enum_values) { print "

WARNING: The following lines in the database\n", "contain illegal enum values (and thus will never be shown): ", join(", ", @rows_with_illegal_enum_values), "

\n"; } if (scalar @rows_with_null_enum_values) { print "

WARNING: The following lines in the database\n", "contain null enum values (and thus will never be shown): ", join(", ", @rows_with_null_enum_values), "

\n"; } if (scalar @rows_with_too_many_fields) { print "

WARNING: The following lines in the database\n", "contain too many fields (the extras don't get shown): ", join(", ", @rows_with_too_many_fields), "

\n"; } } close DATA; print "

Search Form

\n"; print "

With this form, you can modify display of the database.\n"; } else { # $mode eq "gen_form" # Search form only. print "

With the form below, you can view all or portions of the\n", "database. To view the whole database in the default sort order\n", "and orientation, just hit the Search button.\n"; } # Now print the part of the instructions and search form common to both gen_form # and search_db modes. print "To view only selected records, type in text to search for. To\n", "omit fields, or to sort them in a different order, use the\n", "per-field radio buttons. To modify search or output behavior,\n", "use the controls at the bottom of the form.

\n"; print '
', "\n"; # Qualify with a gen_form_search_db "namespace" so we don't collide with a # user field called "database". print $cgi->hidden(-name=>'gen_form_search_db::database', -default=>$database_path), "\n"; print "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n"; foreach $field_number (1 .. $field_count) { print $cgi->th($field_number); } print "\n"; # Need to create this to stop radio_group() from adding redundant labels below. foreach $number (0 .. $field_count) { $blank_labels{$number} = ""; } foreach $field_number (1 .. $field_count) { $field_ref = $fields[$field_number - 1]; $field_name = $field_ref->[0]; $field_type = $field_ref->[1]; # The center alignment default is for the benefit of the radio buttons. # The other stuff overrides with CLASS=left. print ""; print "\n"; print "\n"; @radio_group = $cgi->radio_group(-name => "gen_form_search_db::$field_name sort", -values => [(0 .. $field_count)], -default => $field_number, -labels => \%blank_labels); foreach $radio_button (@radio_group) { print $cgi->td($radio_button), "\n"; } print "\n"; } print ""; print $cgi->td($cgi->submit("Search")), "\n"; print "\n"; print "
Don\'t ShowSort Order
FieldSelect
", html_escape($field_name), ""; if ($field_type eq "enum") { # This table encloses all the checkbox value columns. It has one # row and up to 3 columns, each a table with two columns and # multiple rows. print "", "\n"; @values = @{$field_ref}[2 .. $#{$field_ref}]; $value_count = @values; $rows = ceil($value_count / 3); # 3 == max columns foreach $i (0 .. 2) { # 2 == max columns - 1 $opened_innermost_table = 0; foreach $j (0 .. $rows - 1) { $index = ($i * $rows) + $j; if ($index < $value_count) { $value = $values[$index]; if (not $opened_innermost_table) { # If we haven't opened the innermost table yet, open # it now, before printing out its first row. We # have to wait till the last minute like this # because all three outer columns are not always # used (depending on how many enumerators there # are). If we always open the table before the # inner foreach and always close it afterwards, we # can end up with tables containing no rows, which # constitute illegal HTML. if ($i > 0) { # The outer table has no cellpadding or spacing, # so that everything lines up nicely, so we need # to separate the rows with non-breaking space # columns. Don't put one in front of first col. print $cgi->td("  "), "\n"; } # This table encloses the actual checkboxes and # their labels. It has two columns and multiple # rows. print "\n"; } } print "
\n"; $opened_innermost_table = 1; } print "\n"; print "', "\n", '\n"; } } if ($opened_innermost_table) { print "
', html_escape($value), "
"; } else { # $field_type eq "html" or $field_type eq "text" $field_size = $fields[$field_number - 1][2]; print $cgi->textfield(-name => $field_name, -size => $field_size); } print "
", $cgi->reset("Reset Form"), "
\n"; print "
\n"; print "\n"; print "\n"; print "\n"; print "\n"; # spacer print "\n"; print "\n"; # spacer print "\n"; print "\n"; print "\n"; print "\n"; print "\n"; # spacer print "', "\n"; print "\n"; # spacer print "\n"; print "\n"; print "
Interpret search strings as:   Searches and sorts are:   Output one record per:
', "\n"; print "plain text
\n"; print "', "\n"; # Note that we have to make these radio buttons manually rather than using # radio_group() since we want to use HTML markup in the button label. # Therefore, more straightforward to do ALL the search control fields manually. # # TBD: Check what version of perl we're actually running under and # look up *that* version of the docs? (Might not always be available.) print 'Perl regular expressions', ""; print "
case-sensitive\n"; print "', "\n"; print "row
\n"; print "', "\n"; print "column"; print "
\n"; print "

\n"; print "Repeat headings every:\n"; print '\n"; } else { print " VALUE=0>\n"; } print "record(s)\n"; print "

\n"; print "
\n"; ## Footer ###################################################################### print "
\n"; print "\n"; print "\n"; print "\n"; print "\n"; print "\n"; print "
"; # If a user footer was specified, put it here. if ($footer) { print $footer; } # The user-visible "Database last modified:" date is only based on the # modification timestamp of the DATA file. It shouldn't be updated just because # we, say, added extra enum values that we're not using yet to the DESC file, or # because we changed a comment in this script. @localtime = localtime $DATA_mod_time; print "Database last modified: ", (qw(January February March April May June July August September October November December))[$localtime[4]], " ", $localtime[3], ", ", 1900 + $localtime[5]; print ""; $self_url = url_escape($cgi->self_url); # TBD: doesn't escape '?' print 'Validated HTML 5 + ", 'CSS
\n"; print 'Generator: ' . "gen_form_search_db
\n"; warningsToBrowser(1); # output any warnings that have been saved up as comments ## HTML end #################################################################### print $cgi->end_html, "\n";