modules/text/slug.zzm

text-slug-0.0.1 source code

Package

Name
text-slug
Version
0.0.1
Uploaded
2026-06-15 14:41:04
Dependencies
Metadata
zuzu-distribution.json
Archive
Download .tar.gz
=encoding utf8

=head1 NAME

text/slug - Stable text slugs for URLs and filenames.

=head1 SYNOPSIS

  from text/slug import slugify, slug_words, filename_slug, unique_slug;

  say( slugify("A Useful Thing!") );                 // a-useful-thing
  say( filename_slug("Quarterly Report", "pdf") );   // quarterly-report.pdf
  say( unique_slug([ "post", "post-2" ], "Post") );  // post-3

=head1 DESCRIPTION

This module turns text text into stable slugs. Whitespace and punctuation
become separators, common non-ASCII letters are folded to ASCII, output is
lower-case by default, and callers may supply stop words.

=head1 EXPORTED FUNCTIONS

=over

=item * C<< slug_words(String text, Dict options?) >>

Return the normalized slug words as an array.

=item * C<< slugify(String text, Dict options?) >>

Return a separator-joined slug. Supported options are C<separator>,
C<lowercase>, C<ascii>, C<stop_words>, and C<fallback>.

=item * C<< filename_slug(String title, String extension?) >>

Return a conservative filename using C<title> as the basename and
C<extension> as an optional suffix.

=item * C<< unique_slug(existing, String title, Dict options?) >>

Return a slug for C<title> that does not already appear in C<existing>.
C<existing> may be an Array, Bag, Set, Dict, or PairList.

=back

=head1 COPYRIGHT AND LICENCE

B<< text/slug >> is copyright Kit Calder.

It is free software; you may redistribute it and/or modify it under
the terms of either the Artistic License 1.0 or the GNU General Public
License version 2.

=cut

from std/string import index, join, replace, split, substr, trim;

const _ASCII_FOLDS := [
    [ "àáâãäåāăąǎǟǡǻȁȃạảấầẩẫậắằẳẵặ", "a" ],
    [ "çćĉċč", "c" ],
    [ "ďđð", "d" ],
    [ "èéêëēĕėęěȅȇẹẻẽếềểễệ", "e" ],
    [ "ƒ", "f" ],
    [ "ĝğġģ", "g" ],
    [ "ĥħ", "h" ],
    [ "ìíîïĩīĭįıǐȉȋịỉ", "i" ],
    [ "ĵ", "j" ],
    [ "ķ", "k" ],
    [ "ĺļľŀł", "l" ],
    [ "ñńņňʼn", "n" ],
    [ "òóôõöøōŏőǒǿȍȏọỏốồổỗộớờởỡợ", "o" ],
    [ "ŕŗř", "r" ],
    [ "śŝşšș", "s" ],
    [ "ß", "ss" ],
    [ "ţťŧț", "t" ],
    [ "ùúûüũūŭůűųǔȕȗụủứừửữự", "u" ],
    [ "ŵ", "w" ],
    [ "ýÿŷȳỵỷỹ", "y" ],
    [ "źżž", "z" ],
    [ "æ", "ae" ],
    [ "œ", "oe" ],
    [ "þ", "th" ],
];

function _option ( options, String key, fallback ) {
    if ( typeof options == "Dict" and options.exists(key) ) {
        return options.get(key);
    }
    return fallback;
}

function _string_option ( options, String key, String fallback ) {
    let value := _option( options, key, fallback );
    if ( typeof value ne "String" ) {
        die "text/slug: " _ key _ " expects String";
    }
    return value;
}

function _bool_option ( options, String key, Boolean fallback ) {
    let value := _option( options, key, fallback );
    if ( typeof value ne "Boolean" ) {
        die "text/slug: " _ key _ " expects Boolean";
    }
    return value;
}

function _separator ( options ) {
    let separator := _string_option( options, "separator", "-" );
    if ( separator eq "" ) {
        die "text/slug: separator must not be empty";
    }
    return separator;
}

function _fold_char ( String ch ) {
    for ( let row in _ASCII_FOLDS ) {
        if ( index( row[0], ch ) >= 0 ) {
            return row[1];
        }
    }
    return ch;
}

function _ascii_fold ( String text ) {
    let out := "";
    let i := 0;
    while ( i < length text ) {
        out _= _fold_char( substr( text, i, 1 ) );
        i++;
    }
    return out;
}

function _word_key ( String word ) {
    return lc( _ascii_fold(word) );
}

function _stop_words ( options ) {
    let raw := _option( options, "stop_words", [] );
    if ( not( raw instanceof Array ) ) {
        die "text/slug: stop_words expects Array";
    }

    let out := [];
    for ( let word in raw ) {
        if ( typeof word ne "String" ) {
            die "text/slug: stop_words expects Array of String";
        }
        out.push( _word_key(word) );
    }
    return out;
}

function _source_text ( String text, options ) {
    let source := _bool_option( options, "ascii", true )
        ? _ascii_fold(text)
        : text;
    return _bool_option( options, "lowercase", true )
        ? lc(source)
        : source;
}

function slug_words ( String text, options? ) {
    let clean := replace( _source_text( text, options ), /[^A-Za-z0-9]+/, " ", "g" );
    clean := trim(clean);
    if ( clean eq "" ) {
        return [];
    }

    let stops := _stop_words(options);
    let out := [];
    for ( let word in split( clean, " " ) ) {
        if ( not stops.contains( _word_key(word) ) ) {
            out.push(word);
        }
    }
    return out;
}

function slugify ( String text, options? ) {
    let words := slug_words( text, options );
    if ( words.length() == 0 ) {
        return _string_option( options, "fallback", "n-a" );
    }
    return join( _separator(options), words );
}

function filename_slug ( String title, String extension := "" ) {
    let base := slugify( title, { fallback: "untitled" } );
    let ext := slugify(
        replace( extension, /^\.+/, "" ),
        { separator: "-", fallback: "" },
    );
    return ext eq "" ? base : base _ "." _ ext;
}

function _existing_contains ( existing, String candidate ) {
    if ( existing instanceof Dict or existing instanceof PairList ) {
        return existing.exists(candidate);
    } else if (
        existing instanceof Array
        or existing instanceof Bag
        or existing instanceof Set
    ) {
        return existing.contains(candidate);
    }
    die "text/slug: existing expects Array, Bag, Set, Dict, or PairList";
}

function unique_slug ( existing, String title, options? ) {
    let base := slugify( title, options );
    let candidate := base;
    let suffix := 2;
    let separator := _separator(options);

    while ( _existing_contains( existing, candidate ) ) {
        candidate := base _ separator _ suffix;
        suffix++;
    }
    return candidate;
}