#!/bin/sh -- # A comment mentioning perl, to prevent perl from looping. Indented to work with bash. eval 'exec perl -S $0 ${1+"$@"}' if 0; $Perl = '#!/usr/bin/perl'; chop($program = `basename $0`); # get program name, usually "fixer" &initialize_global_variables(); # Make Usage statement: $version_num = '1.1'; $Version = "$program: Version $version_num Copyright (c) 1994,1996 Karl J. Runge"; $Usage = <<"EOQ"; $Version A script to: 1) Grab word/variable values from files to insert into Formats. 2) Print out lines matching a given Pattern. 3) Remove and/or Substitute given strings from output. 4) Print out lines occurring in the Section between given strings. 5) Print out the N-line Section occurring after a given string. 6) Do items 1-3 inside a Section specified by 4 & 5. Usage: $program [] file1 file2 ... fileN or if files match a shell wildcard, e.g.,: $program [] file* Options: -t Use "file" as template for searches. -o Write output to "file" instead of standard output. -overwrite Overwrite each input file with its associated output. Input file is backed up to "file.bak". -noback Do not backup if overwriting. -ext<.xxx> Place output of file1 in "file1.xxx", etc. -help Print out this info. -info -syntax Print out info on template file syntax. -d Debug mode on. -q Be quiet, don''t print information messages to STDERR. -b Print blank lines after each file. -b => one blank. -m Print "msg" after each file processed. -f Pass each file thru program before processing. The string "%s" will be expanded to the filename. -c "Compile" the template into an "inline" perl script, then exit w/o doing any runs. Use of standard input: The filename "-" means the data file is coming from stdin: cat file1 | $program -t template - (no filenames is equivalent to the above) Compressed data: Files ending in ".gz" or ".Z" are automatically decompressed during reading. (see also -f above) $Compression_Usage -------------------------------------------------------------------- Example template: $Example -------------------------------------------------------------------- Do "$program -info" for more information on the format of the template. EOQ $TODO = <<'EOQ'; # this is the TODO comment ItemOrder = 'file' not implemented. better protection defaults? catf stuff. (__NEW_FILE leader) AFTER-N-LINES If no match to %x=% use Section Zero?? Allow \]''s inside %Eval[`echo [abc\]`] Should still double check if last char is newline in `cmd` chopping. What else in %Eval[] needs newline trimming? EOQ $Syntax = <<"EOQ"; # make string for "$program -info" or -syntax ============== Syntax for the -t Template file: ===================== # A comment, "#" must be first non-white character in a line. Keywords: --------- %Format, %EndFormat %Search %Section, %EndSection %Grep, %Delete, %Sub %Eval Keywords are also case insensitive. e.g. %Format == %format, etc. Except for %Eval, a Keyword must be the first non-white characters of a line. ----------------------------------------------------------------------------- The template file is a schematic model or picture of how the output will appear. The main components are Sections and Formats. The idea of $program is to break up the input file into "Sections", defined by line pattern matches. These Sections are printed to the output. Any number may be defined in the template. Section matches to the actual input may happen 0, 1, or more times. Further selection and transformation of the data inside a given Section may be prescribed (using %Grep, %Delete, %Sub) To aid in documenting the output, text regions called "Formats" may be placed all around the Section outputs. The simplest Format would be a header. Strings surrounded by %''s in Format are expanded to data words found in the input file. Output of commands may be substituted with %Eval. ----------------------------------------------------------------------------- Here is a real example: $Example ----------------------------------------------------------------------------- Here is a rundown of each Keyword: ---------------------------------- %Format %EndFormat The in format will be printed to the output after the input file has been processed. Any number of %Format's may be spread over the template file. Their order of occurence and placement is preserved. If has an occurrence like %time=% (the "=" is not needed) in it, then while the input file is being processed if a line matching "time= " is found the "" is stored and *replaces* %time=% in the output. As many %...% variables may be defined as one likes. The special case %_file_% is expanded to be the current file being processed. If you need a real % inside the Format text use \\% %Eval[...] is expanded (at end of the run, and after %...% variables are expanded) to the results of running the command inside the [...]. To access UNIX utilities use %Eval[`cmd`]. Any perl command may be inside [...] Mathematical expressions may be computed via %Eval[...] E.g. %Eval[%time=% * 60] %Eval[`whoami`] %Eval[\$ENV{'PWD'}] ---------------------------------- %Section: /regexp1/ <%commands> %EndSection: /regexp2/ A Section of the input file is defined here to start with a line matching /regexp1/ and to end with the next line matching /regexp2/ The additional <%commands> are optional. They may be %Formats or others (specifically, %Grep, %Delete, %Sub) To exclude the line matching /regexp1/ from the outputted Section, use: %Section(exclude): /regexp1/ etc... "exclude" is case insensitive, and may be abbreviated to "x" or "e". %EndSection(exclude): /regexp2/ does the similar thing for the last line. To protect meta characters in the regular expressions (e.g. '(' or '+') use: %Section(protect): /regexp1/ "protect" is case insensitive, and may be abbreviated to "p". Protect them yourself with '\\'s if you prefer. Any number of %Formats may be placed inside a %Section template. The "%...%" variables in these Formats are looked for only *inside* the found Section text. (to be generalized later) ---------------------------------- %Search %% %% ... The %Search keyword says to search for words as in %Format using the % delimiters (e.g. %foo%). The found words can be used then for expansion inside ANY %Section. Thus variables found outside a Section and be used in the Section''s Formats. One is allowed to place a %Search inside a %Section, but it doesn''t make much sense at present. ---------------------------------- %Grep: /regexp/ Print lines matching /regexp/. If outside a %Section then it is for the whole file. If inside a %Section only matching Section lines make it out. /regexp/ may be protected via %Grep(protect): /regexp/ as for %Section. ---------------------------------- %Delete: /regexp/ Do Not Print lines matching /regexp/. If outside a %Section then it is for the whole file. If inside a %Section then those lines are excluded from the Section text.. /regexp/ may be protected via %Delete(protect): as above. ---------------------------------- %Sub: /from/to/ Transform outputted text via regexp /from/to/. If outside a %Section then it is for the whole file (but only for that which is outputted). If inside a %Section then only those lines inside the Section are transformed. /from/to/ may be NOT protected via %Sub(protect):. You must protect it explicitly with '\\'s. ---------------------------------- Miscellaneous: %Eval: If you need a real ] inside the %Eval's []''s in the cmd use \\] A perl subroutine &sub() is provided so one may do easy regsubs, e.g.: %Eval[&sub('%foo%','/A/a/g')]; that is, the word-search value found for "%foo%" will be have all the A's replaced by a's. %Format: Any line in a format beginning with "_EXCLUDE_" (no leading white-space) will not be printed in the output. This is how %Search is implemented (see above). This is not very useful, but some tricks can be concocted from it. EOQ LOOP: # process command line arguments... while (@ARGV) { $_ = shift; CASE: { /^-h.*/ && ( (&help()), exit 0, last CASE); /^-info.*|^-syn.*/ && ( (&info), exit 0, last CASE); /^-d$/ && ( $Debug = 'True', last CASE); /^-over.*/ && ( $Overwrite = 'True', last CASE); /^-noback.*/ && ( $Backup = '', last CASE); /^-t$/ && ( $Template = shift, last CASE); /^-f$/ && ( $Filter = shift, last CASE); /^-q$/ && ( $Quiet = 'True', last CASE); /^-c$/ && ( $Compile_Only = 'True', last CASE); /^-$/ && ( $DataStdin = 'True', last CASE); /^-o$/ && ( $OutFile = shift, last CASE); /^-ext(\S*)$/ && ( $Extension = "X$1", last CASE); /^-b(\d*)$/ && ( $Blank = "X$1", last CASE); /^-m$/ && ( $Message = shift, last CASE); /^-todo$/ && ( (print STDERR "\n$TODO"), exit 1, last CASE); /^--$/ && (last LOOP); # -- means end of switches /^-(-.*)$/ && (unshift(ARGV, $1), last CASE); if ( /^-(..+)$/ ) { # split bundled switches: local($y, $x) = ($1, ''); foreach $x (reverse(split(//, $y))) { unshift(ARGV,"-$x") }; last CASE; } /^-.+/ && ( (print STDERR "\n$program: $_ not an option! try: $program -help and $program -info\n\n"), exit 1, last CASE); unshift(@ARGV,$_); last LOOP; } } # Check if output file(s) situation: if ( $Extension =~ /^X(\S*)$/ ) { if ( "$1" eq "" ) { $Extension = ".out"; } else { $Extension = "$1"; } if ( $OutFile ne "STDOUT" ) { print STDERR "$program: \"-ext\" option overriding \"-o \"\n" unless $Quiet; $OutFile = "STDOUT"; } if ( $Overwrite ) { print STDERR "$program: \"-ext\" option overriding \"-overwrite\"\n" unless $Quiet; $Overwrite = ''; } } if ( $Blank =~ /^X(\S*)$/ ) { if ( "$1" eq "" ) { $Blank = "\n"; } else { $num = "$1"; $Blank = "\n" x $num; } } if ( $Message ne '' ) { $Blank = $Message."\n".$Blank; } if ( $Overwrite && $OutFile ne "STDOUT" ) { print STDERR "$program: \"-o \" option overriding \"-overwrite\"\n" unless $Quiet; $Overwrite = ''; } # Check if too much coming from stdin: if ( $Template eq "-" ) { print STDERR "\n$program: template file cannot be STDIN. Use -t \n\n"; exit 1; } if ( ! $Template ) { print STDERR "\n$program: Sorry, you must use input search template external to script\n\n"; exit 1; } &parse_template($Template); &make_fixer_engine(); select STDOUT; $| = 1; @File_List = @ARGV; &loop_over_input_files(); exit 0; sub loop_over_input_files { foreach $file (@File_List) { # for each file... $ThisFile = $file; $File_To_Read = $ThisFile; $process_then_move_to = ''; if ( $ThisFile eq '-' ) { $ENV{'FIXER_CURRENT_FILE'} = "STDIN"; } else { $ENV{'FIXER_CURRENT_FILE'} = $ThisFile; } $Compress_Suffix = ''; foreach $suffix (sort(keys(%Decompress))) { local($regex) = $suffix.'$'; if ( $ThisFile =~ /$regex/ ) { $Compress_Suffix = $suffix; last; } } if ( $OutFile eq $ThisFile ) { $process_then_move_to = $ThisFile; $OutFile = $TmpFile; } elsif ( $Overwrite && $ThisFile ne "-" ) { # setup for overwriting. &msg("Doing $ThisFile ") unless $Quiet; if ( $Backup ) { $fileback = "$ThisFile"."$Backup"; system "cp $ThisFile $fileback"; # cp -p would be better $File_To_Read = $fileback; $OutFile = $ThisFile; &msg(" (backed up to $fileback)\n") unless $Quiet; } else { $process_then_move_to = $ThisFile; $OutFile = $TmpFile; } } elsif ( $Extension && $ThisFile ne "-" ) { # setup for writing to file.ext $OutFile = $ThisFile; if ( $Compress_Suffix ) { local($regex) = $Compress_Suffix.'$'; $OutFile =~ s/$regex//; } $OutFile .= "$Extension"; &msg("$ThisFile -> $OutFile\n") unless $Quiet; } $cmd = ''; if ( $Filter ) { $cmd = $Filter; $cmd =~ s/%s/$File_To_Read/g; &msg("(Filtering input via: $cmd)\n") unless $Quiet; } else { $decompress = ''; foreach $suffix (sort(keys(%Decompress))) { local($regex) = $suffix.'$'; if ( $ThisFile =~ /$regex/ ) { $decompress = $Decompress{$suffix}; $decompress =~ s/%s/$File_To_Read/g; last; } } if ( $decompress ne '' ) { &msg("(Decompressing via: $decompress)\n") unless $Quiet; $cmd = $decompress; } else { $cmd = "cat $File_To_Read"; } } if ( $OutFile eq '-' || $OutFile =~ /stdout/i ) { system("$cmd | $Engine"); } else { system("$cmd | $Engine > $OutFile"); } if ( $process_then_move_to ) { system("mv $OutFile $process_then_move_to"); } if ( $Blank ne '' ) { print STDOUT $Blank; } } # Done, remove "engine" unlink("$Engine"); } sub help { local($data) = @_; $data = $Usage if $data eq ''; if ( $ENV{'PAGER'} ne '' && open(PAGER, "|$ENV{'PAGER'}") ) { # open pipeline to user's pager print PAGER "$data"; close(PAGER); } elsif ( open(MORE, "|more") ) { # open pipeline to "more" print MORE "$data"; close(MORE); } else { print STDERR "$data"; } } sub info { &help($Syntax); } sub initialize_global_variables { $Overwrite = ''; $Backup = '.bak'; $Extension = ''; $Template = ''; $OutFile = "STDOUT"; $DataStdin = ''; $Blank = ''; $Message = ''; $Compile_Only = ''; $Debug = ''; $Engine = "/tmp/${program}_engine$$"; # $Engine = "./engine"; $TmpFile = "/tmp/${program}_tmp$$"; $Decompress{'\.Z'} = 'zcat %s'; $Decompress{'\.gz'} = 'gzip -dc %s'; $Compress_Suffix = ''; $Compression_Usage = "\t\tSuffix:\t \tDecompression cmd:\n"; local($key, $suff); foreach $key (sort(keys(%Decompress))) { $suff = $key; $suff =~ s/\\//g; $Compression_Usage .= "\t\t$suff\t=>\t$Decompress{$key} \| ...\n"; } $ItemOrder = 'template'; foreach $key ('_file_') { $FormatSkip{$key} = '1'; } $Example = <<"EOQ"; # This first Format is the header: %Format File: %_file_% Uf=%u2=% N= %n=%x%n=% L= %l=% beta= %Eval[%l=%*%dtau=%] (This work was performed on %Eval[`date`]) %EndFormat # Here is the first section we want: %Section(X): /sh 1 xx Spin correlation function:/ %EndSection: BLANK # Here is the 2nd section we want: %Section(P,X): /G(nx,ny,ti) SHEET 2:/ %Format (Hi Mom!) tau G(tau) St dev %EndFormat %Sub: /\\+\\-// %EndSection(X): /nx = 0 ny = 1/ EOQ } sub parse_template { local($template) = @_; local($print_items) = ''; open(TEMPLATE, "$template") || die "$program: can't open $template"; $Sections = '0'; $Items = '0'; $Curr_Section = '0'; while (