aboutsummaryrefslogtreecommitdiffstats
path: root/http.req
diff options
context:
space:
mode:
Diffstat (limited to 'http.req')
-rwxr-xr-xhttp.req165
1 files changed, 165 insertions, 0 deletions
diff --git a/http.req b/http.req
new file mode 100755
index 0000000..5d04d0c
--- /dev/null
+++ b/http.req
@@ -0,0 +1,165 @@
+#!/usr/bin/perl
+
+# This file can find requirements of html and jhtml files (cgi, gif,
+# java dependencies). It is a bit of a hack but it turns out to work
+# well. We track only dependencies between Relative URLs, absolute
+# URL's are assumed to be extenernal to the RPM system. We do not
+# parse the HTML but look through the set of strings (text surrounded
+# by quotes) for something which looks like a reference. This avoids
+# writing a full HTML parsers and tends to work really well. In this
+# manner we can track dependencies for: href, src, action and other
+# HTML tags which have not been invented yet.
+
+
+# The reference:
+#
+# href="http://www.perl.org/images/arrow.gif"
+#
+# does not create a dependency but the reference
+#
+# href="images/arrow.gif"
+#
+# will create a dependency.
+
+# Additionally this program will find the requirements for sun jhtml
+# (html with embedded java) since jhtml is deprecated so is this part
+# of the code.
+
+# These references create dependencies:
+
+# <form action="signup.jhtml" method="POST">
+#
+# <img src="images/spacer.gif" width=1>
+#
+# <A HREF="signup.jhtml">
+#
+# adWidget.writeAd(out, "login.html", "expired");
+#
+# response.sendRedirect("http://"+request.getServerName()+"/mailcom/login.jhtml");
+
+
+# Notice how we look for strings WITH the proper ending. This is
+# because the java sometimes has really strange double quoting
+# conventions. Look at how splitting out the strings in this
+# fragment would get you the wrong text.
+
+# <img src="`c.getImage("bhunterlogo.gif")`" width=217 >
+
+# Ignore non relative references since these dependencies can not be
+# met. (ie, no package you install will ever provide
+# 'http://www.yahoo.com').
+
+# I use basename since I have seen too many http references which
+# begin with '../' and I can not figure out where the document root
+# is for the webserver this would just kill the dependnecy tracking
+# mechanism.
+
+
+
+use File::Basename;
+
+# this is the pattern of extensions to call requirements
+
+$DEPS_PAT = '\.((cgi)|(ps)|(pdf)|(png)|(jpg)|(gif)|(tiff)|(tif)|(xbm)|(html)|(htm)|(shtml)|(jhtml))'; #'
+
+if ("@ARGV") {
+ foreach (@ARGV) {
+ process_file($_);
+ }
+} else {
+
+ # notice we are passed a list of filenames NOT as common in unix the
+ # contents of the file.
+
+ foreach (<>) {
+ process_file($_);
+ }
+}
+
+
+
+foreach $key (sort keys %seen) {
+ print "$key\n";
+}
+
+
+sub process_file {
+
+ my ($file) = @_;
+ chomp $file;
+
+ open(FILE, "<$file")||
+ die("$0: Could not open file: '$file' : $!\n");
+
+ # we have to suck in the whole file at once because too many people
+ # split lines around <java></java> tags.
+
+ my (@file) = <FILE>;
+
+ $_= "@file";
+
+ # ignore line based comments ( careful although it has two slashes
+ # 'http://www.yahoo.com' is not a comment! )
+
+ s!^\s*//.*$!!mg;
+ s!//\s.*$!!mg;
+ s!\s//.*$!!mg;
+
+ # ignore multi-line comments
+ # (use non greedy operators)
+
+ s!/\*.*?\*/!!g;
+ s/<!--.*?-->//g;
+
+ # Ignore non relative references since these dependencies can not be
+ # met. (ie, no package you install will ever provide
+ # 'http://www.yahoo.com').
+
+ # I use basename since I have seen too many http references which
+ # begin with '../' and I can not figure out where the document root
+ # is for the webserver this would just kill the dependnecy tracking
+ # mechanism.
+
+
+ # Notice how we look for strings WITH the proper ending. This is
+ # because the java sometimes has really strange double quoting
+ # conventions. Look at how splitting out the strings in this
+ # fragment would get you the wrong text.
+
+ # <img src="`c.getImage("bhunterlogo.gif")`" width=217 >
+
+ while ( m{\"([^\"]+$DEPS_PAT)\"}g ) {
+ my $string = $1;
+ chomp $string;
+ if ( $string !~ m!http://! ) {
+ $string = basename($string);
+ $string =~ s!\s+!!g;
+ $seen{"http(${string})"} = 1;
+ }
+ }
+
+ {
+
+ # This section is only for use with (Sun) jhtml dependencies, and
+ # since jhtml is deprecated so is this code.
+
+ # java imports in jhtml (may have stars for leaf class)
+ # these may span several lines
+
+ while ( m!<java type=((import)|(extends))>\s*([^<]+)\s*<!g ) {
+ my $java_list = $4;
+ $java_list =~ s/;/ /g;
+ $java_list =~ s/\n+/ /g;
+ $java_list =~ s/\s+/ /g;
+ foreach $java_class ( split(/\s+/, $java_list) ) {
+ $seen{"java(${java_class})"} = 1;
+ }
+ }
+
+ }
+
+ close(FILE)||
+ die("$0: Could not close file: '$file' : $!\n");
+
+ return ;
+}