aboutsummaryrefslogtreecommitdiffstats
path: root/http.req
blob: 5d04d0c63d0e1762fa57fa2d80326b971a9ec180 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
#!/usr/bin/perl

# This file can find requirements of html and jhtml files (cgi, gif,
# java dependencies).  It is a bit of a hack but it turns out to work
# well.  We track only dependencies between Relative URLs, absolute
# URL's are assumed to be extenernal to the RPM system.  We do not
# parse the HTML but look through the set of strings (text surrounded
# by quotes) for something which looks like a reference.  This avoids
# writing a full HTML parsers and tends to work really well.  In this
# manner we can track dependencies for: href, src, action and other
# HTML tags which have not been invented yet.


# The reference:
#
#	href="http://www.perl.org/images/arrow.gif"
#
# does not create a dependency but the reference
#
#	href="images/arrow.gif"
#
# will create a dependency.  

# Additionally this program will find the requirements for sun jhtml
# (html with embedded java) since jhtml is deprecated so is this part
# of the code.

# These references create dependencies:

#	<form action="signup.jhtml" method="POST">
#
#	<img src="images/spacer.gif" width=1>
#
#	<A HREF="signup.jhtml">
#
#	adWidget.writeAd(out, "login.html", "expired");
#
#	response.sendRedirect("http://"+request.getServerName()+"/mailcom/login.jhtml");


# Notice how we look for strings WITH the proper ending. This is
# because the java sometimes has really strange double quoting
# conventions.  Look at how splitting out the strings in this
# fragment would get you the wrong text.

#      <img src="`c.getImage("bhunterlogo.gif")`" width=217 >

# Ignore non relative references since these dependencies can not be
# met. (ie, no package you install will ever provide
# 'http://www.yahoo.com').

# I use basename since I have seen too many http references which
# begin with '../' and I can not figure out where the document root
# is for the webserver this would just kill the dependnecy tracking
# mechanism.



use File::Basename;

# this is the pattern of extensions to call requirements

$DEPS_PAT = '\.((cgi)|(ps)|(pdf)|(png)|(jpg)|(gif)|(tiff)|(tif)|(xbm)|(html)|(htm)|(shtml)|(jhtml))'; #'

if ("@ARGV") {
  foreach (@ARGV) {
    process_file($_);
  }
} else {
  
  # notice we are passed a list of filenames NOT as common in unix the
  # contents of the file.
  
  foreach (<>) {
    process_file($_);
  }
}



foreach $key (sort keys %seen) {
  print "$key\n";
}


sub process_file {

  my ($file) = @_;
  chomp $file;
  
  open(FILE, "<$file")||
    die("$0: Could not open file: '$file' : $!\n");
  
  # we have to suck in the whole file at once because too many people
  # split lines around <java></java> tags.
  
  my (@file) = <FILE>;
  
  $_= "@file";

  # ignore line based comments ( careful although it has two slashes
  # 'http://www.yahoo.com' is not a comment! )

  s!^\s*//.*$!!mg;
  s!//\s.*$!!mg;
  s!\s//.*$!!mg;
  
  # ignore multi-line comments 
  # (use non greedy operators)
  
  s!/\*.*?\*/!!g;
  s/<!--.*?-->//g;

  # Ignore non relative references since these dependencies can not be
  # met. (ie, no package you install will ever provide
  # 'http://www.yahoo.com').

  # I use basename since I have seen too many http references which
  # begin with '../' and I can not figure out where the document root
  # is for the webserver this would just kill the dependnecy tracking
  # mechanism.


  # Notice how we look for strings WITH the proper ending. This is
  # because the java sometimes has really strange double quoting
  # conventions.  Look at how splitting out the strings in this
  # fragment would get you the wrong text.

  #      <img src="`c.getImage("bhunterlogo.gif")`" width=217 >

  while ( m{\"([^\"]+$DEPS_PAT)\"}g ) {
    my $string = $1;
    chomp $string;
    if ( $string !~ m!http://! ) {
      $string = basename($string);
      $string =~ s!\s+!!g;
      $seen{"http(${string})"} = 1;
    }
  }

  {

  # This section is only for use with (Sun) jhtml dependencies, and
  # since jhtml is deprecated so is this code.

  # java imports in jhtml (may have stars for leaf class)
  # these may span several lines
  
    while (  m!<java type=((import)|(extends))>\s*([^<]+)\s*<!g ) {
      my $java_list = $4;
      $java_list =~ s/;/ /g;
      $java_list =~ s/\n+/ /g;
      $java_list =~ s/\s+/ /g;
      foreach $java_class ( split(/\s+/, $java_list) ) {
	$seen{"java(${java_class})"} = 1;
      }
    }
    
  }

  close(FILE)||
    die("$0: Could not close file: '$file' : $!\n");
  
  return ;
}