-
-
Notifications
You must be signed in to change notification settings - Fork 306
/
Copy pathmkindex.pl
executable file
·155 lines (130 loc) · 3.95 KB
/
mkindex.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
#!/usr/bin/perl -w
# Build an index of words in the file index-words that are found in the text.
# Words are compared case insensitively except for those starting with a dash
# (i.e. program option names). "Words" may actually be phrases consisting of
# more than one word separated by a space. The word as written in the index is
# as found in the file (i.e. using that case).
use feature "fc";
# import shared function
use lib '.';
require "urlify.pl";
# Return the case-folded keyword UNLESS it appears to be an option string
# in which case return it as-is. This makes word lookups case-insensitive
# but option name lookups case-sensitive.
sub folded {
return $_[0] if $_[0] =~ /^-/;
return fc($_[0]);
}
# get all markdown files as arguments
my @files=@ARGV;
open(F, "<index-words") ||
die "no words";
while(<F>) {
chomp;
my $w = $_;
if($w =~ /[ .]/) {
# word with spaces or periods
push @lwords, folded($w);
}
$index{folded($w)}=$w;
}
close(F);
sub single {
my ($fname)=@_;
my $depth;
my $section;
my $url;
my $in_code_section = 0;
open(F, "<$fname");
while(<F>) {
chomp;
my $l=$_;
# Track whether we are within a markdown code block that begins/ends with ```
if($_ =~ /^\`\`\`.*/) {
if($in_code_section) {
$in_code_section = 0;
} else {
$in_code_section = 1;
}
}
if(!$in_code_section) {
if($_ =~ /^(#[\#]*) ([^\{]*)(\{\#(.*)\})/) {
# This section header has an explicit ID specified, e.g. "#Foo {#foo}"
$depth = $1;
$section = $2;
my $dest_id=$4;
# trim whitespace off end of section
$section =~ s/\s+$//;
$url = "$fname#$dest_id";
# Use only the anchor text when a section heading has an anchor tag
if($section =~ m/(.*)\[(.*)\]\(.*\).*/) {
$section = "$1$2";
}
$l = $section;
}
elsif($_ =~ /^(#[\#]*) (.*)/) {
# This section header has no explicit ID specified, e.g. "#Foo"
$depth = $1;
$section = $2;
# trim whitespace off end of section
$section =~ s/\s+$//;
my $url_section = urlify($section);
$url = "$fname#$url_section";
# Use only the anchor text when a section heading has an anchor tag
if($section =~ m/(.*)\[(.*)\]\(.*\).*/) {
$section = "$1$2";
}
$l = $section; # use this too
}
}
my @words = split(/[ \(\)]+/, $_);
for my $w (@words) {
$w =~ s/[,\.\`\'\]\[]//g;
$w = folded($w);
if($index{$w}) {
if(!$word{$w}{$fname}) {
$word{$w}{$fname}++;
$all{$w} .= ($all{$w}?", ":"")."[$section]($url)";
}
}
}
# check longer words
foreach my $w (@lwords) {
if(folded($l) =~ /$w/) {
if(!$word{$w}{$fname}) {
$word{$w}{$fname}++;
$all{$w} .= ($all{$w}?", ":"")."[$section]($url)";
}
}
}
}
close(F);
}
for my $f (@files) {
single($f);
}
print "# Index\n\n";
sub sorting {
my ($s) = @_;
$s = uc($s); # first uppercase
$s =~ s/^[-<\/.]+//; # remove initial junk
return $s;
}
sub byname {
my $c = sorting($a) cmp sorting($b);
if(!$c) {
$c = $a cmp $b;
}
return $c;
}
my %letter;
foreach my $w (sort byname keys %all) {
my $l = substr(sorting($w), 0, 1);
if(!$letter{$l}) {
$letter{$l}++;
# Make sure headings have blank lines before and after
print "\n## $l\n\n";
}
printf " - ".$index{$w}.": ";
print $all{$w}."\n";
}