#!/usr/bin/perl

# This file can find requirements of html and jhtml files (cgi, gif,
# java dependencies).  It is a bit of a hack but it turns out to work
# well.  We track only dependencies between Relative URLs, absolute
# URL's are assumed to be extenernal to the RPM system.  We do not
# parse the HTML but look through the set of strings (text surrounded
# by quotes) for something which looks like a reference.  This avoids
# writing a full HTML parsers and tends to work really well.  In this
# manner we can track dependencies for: href, src, action and other
# HTML tags which have not been invented yet.


# The reference:
#
#	href="http://www.perl.org/images/arrow.gif"
#
# does not create a dependency but the reference
#
#	href="images/arrow.gif"
#
# will create a dependency.  

# Additionally this program will find the requirements for sun jhtml
# (html with embedded java) since jhtml is deprecated so is this part
# of the code.

# These references create dependencies:

#	<form action="signup.jhtml" method="POST">
#
#	<img src="images/spacer.gif" width=1>
#
#	<A HREF="signup.jhtml">
#
#	adWidget.writeAd(out, "login.html", "expired");
#
#	response.sendRedirect("http://"+request.getServerName()+"/mailcom/login.jhtml");


# Notice how we look for strings WITH the proper ending. This is
# because the java sometimes has really strange double quoting
# conventions.  Look at how splitting out the strings in this
# fragment would get you the wrong text.

#      <img src="`c.getImage("bhunterlogo.gif")`" width=217 >

# Ignore non relative references since these dependencies can not be
# met. (ie, no package you install will ever provide
# 'http://www.yahoo.com').

# I use basename since I have seen too many http references which
# begin with '../' and I can not figure out where the document root
# is for the webserver this would just kill the dependnecy tracking
# mechanism.



use File::Basename;

# this is the pattern of extensions to call requirements

$DEPS_PAT = '\.((cgi)|(ps)|(pdf)|(png)|(jpg)|(gif)|(tiff)|(tif)|(xbm)|(html)|(htm)|(shtml)|(jhtml))'; #'

if ("@ARGV") {
  foreach (@ARGV) {
    process_file($_);
  }
} else {
  
  # notice we are passed a list of filenames NOT as common in unix the
  # contents of the file.
  
  foreach (<>) {
    process_file($_);
  }
}



foreach $key (sort keys %seen) {
  print "$key\n";
}


sub process_file {

  my ($file) = @_;
  chomp $file;
  
  open(FILE, "<$file")||
    die("$0: Could not open file: '$file' : $!\n");
  
  # we have to suck in the whole file at once because too many people
  # split lines around <java></java> tags.
  
  my (@file) = <FILE>;
  
  $_= "@file";

  # ignore line based comments ( careful although it has two slashes
  # 'http://www.yahoo.com' is not a comment! )

  s!^\s*//.*$!!mg;
  s!//\s.*$!!mg;
  s!\s//.*$!!mg;
  
  # ignore multi-line comments 
  # (use non greedy operators)
  
  s!/\*.*?\*/!!g;
  s/<!--.*?-->//g;

  # Ignore non relative references since these dependencies can not be
  # met. (ie, no package you install will ever provide
  # 'http://www.yahoo.com').

  # I use basename since I have seen too many http references which
  # begin with '../' and I can not figure out where the document root
  # is for the webserver this would just kill the dependnecy tracking
  # mechanism.


  # Notice how we look for strings WITH the proper ending. This is
  # because the java sometimes has really strange double quoting
  # conventions.  Look at how splitting out the strings in this
  # fragment would get you the wrong text.

  #      <img src="`c.getImage("bhunterlogo.gif")`" width=217 >

  while ( m{\"([^\"]+$DEPS_PAT)\"}g ) {
    my $string = $1;
    chomp $string;
    if ( $string !~ m!http://! ) {
      $string = basename($string);
      $string =~ s!\s+!!g;
      $seen{"http(${string})"} = 1;
    }
  }

  {

  # This section is only for use with (Sun) jhtml dependencies, and
  # since jhtml is deprecated so is this code.

  # java imports in jhtml (may have stars for leaf class)
  # these may span several lines
  
    while (  m!<java type=((import)|(extends))>\s*([^<]+)\s*<!g ) {
      my $java_list = $4;
      $java_list =~ s/;/ /g;
      $java_list =~ s/\n+/ /g;
      $java_list =~ s/\s+/ /g;
      foreach $java_class ( split(/\s+/, $java_list) ) {
	$seen{"java(${java_class})"} = 1;
      }
    }
    
  }

  close(FILE)||
    die("$0: Could not close file: '$file' : $!\n");
  
  return ;
}