aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDan Fandrich <danf@mageia.org>2026-03-24 19:40:04 -0700
committerDan Fandrich <danf@mageia.org>2026-03-24 20:19:03 -0700
commit16831c6f788ff8053c594bafa19881a6676bfaf3 (patch)
tree966e25d80f48086166fa9e1e05c2b95e951e6923
parent70856b76e17a6e16a5557e60d80e6e89c178588b (diff)
downloadmgaadvisories-master.tar
mgaadvisories-master.tar.gz
mgaadvisories-master.tar.bz2
mgaadvisories-master.tar.xz
mgaadvisories-master.zip
Detect duplicated words or space charactersHEADmaster
These are real typos more than 95% of time (based on current statistics) and worth eliminating.
-rw-r--r--NEWS1
-rw-r--r--lib/MGA/Advisories.pm19
2 files changed, 20 insertions, 0 deletions
diff --git a/NEWS b/NEWS
index 9024244..3a8995d 100644
--- a/NEWS
+++ b/NEWS
@@ -2,6 +2,7 @@ Version 0.X
- template: move CVEs into the "upstream" field in advisory.json
- add sanity checks for CVE identifiers
+- look for typos involving duplicate words or spaces
Version 0.34
diff --git a/lib/MGA/Advisories.pm b/lib/MGA/Advisories.pm
index e4ff62c..2f6827e 100644
--- a/lib/MGA/Advisories.pm
+++ b/lib/MGA/Advisories.pm
@@ -301,6 +301,25 @@ sub assign_id {
return;
}
+ # Look for errors in the subject and description
+ if ( $adv->{subject} =~ /\b(\w+)\s+\1\s/ ) {
+ print STDERR "Subject has a duplicated word\n";
+ return;
+ }
+ if ( $adv->{subject} =~ /\s\s/ ) {
+ print STDERR "Subject has an extra space character\n";
+ return;
+ }
+ # As of this writing, 0.03% of advisories have a word duplicated
+ # legitimately in the description, which this check would disallow. But
+ # given that this check succeeds in eliminating these typos 95% of the
+ # time, it's probably worth forcing the user to reword the description in
+ # these rare cases.
+ if ( $adv->{description} =~ /\b(\w+)\s+\1\s/ ) {
+ print STDERR "Description has a duplicated word\n";
+ return;
+ }
+
# Turn on autoflush
$|++;