modules/rdf/cli.zzm

rdf-0.0.1 source code

=encoding utf8

=head1 NAME

rdf/cli - Shared RDF command-line helpers.

=head1 DESCRIPTION

Internal helpers used by the RDF distribution's command-line scripts.

=cut

from rdf/parser/ntriples import NTriplesParser;
from rdf/parser/nquads import NQuadsParser;
from rdf/parser/rdfxml import RdfXmlParser;
from rdf/parser/trig import TriGParser;
from rdf/parser/turtle import TurtleParser;
from rdf/serializer/ntriples import NTriplesSerializer;
from rdf/serializer/nquads import NQuadsSerializer;
from rdf/serializer/rdfxml import RdfXmlSerializer;
from rdf/serializer/trig import TriGSerializer;
from rdf/serializer/turtle import TurtleSerializer;
from rdf/store import RDFStore;
from std/db import DB;
from std/getopt import Getopt;
from std/internals import ansi_esc, load_module;
from std/io import Path, STDERR, STDIN, STDOUT;
from std/string import contains, join, rindex, substr, trim;

const RDF_CLI_DEFAULT_STORE := "rdf/store.RDFStore";
const RDF_CLI_DEFAULT_SERIALIZER :=
	"rdf/serializer/ntriples.NTriplesSerializer";

function _rdf_cli_parser_shortcuts () {
	return {
		xml: "rdf/parser/rdfxml.RdfXmlParser",
		turtle: "rdf/parser/turtle.TurtleParser",
		trig: "rdf/parser/trig.TriGParser",
		ntriples: "rdf/parser/ntriples.NTriplesParser",
		nquads: "rdf/parser/nquads.NQuadsParser",
	};
}

function _rdf_cli_serializer_shortcuts () {
	return {
		"xml-out": "rdf/serializer/rdfxml.RdfXmlSerializer",
		"turtle-out": "rdf/serializer/turtle.TurtleSerializer",
		"trig-out": "rdf/serializer/trig.TriGSerializer",
		"ntriples-out": "rdf/serializer/ntriples.NTriplesSerializer",
		"nquads-out": "rdf/serializer/nquads.NQuadsSerializer",
	};
}

function rdf_cli_parse_usage () {
	return join( "\n", [
		"Usage: parse_rdf.zzs [options] [file ...]",
		"",
		"Parser options:",
		"  --parser MODULE.CLASS  Load a parser class dynamically",
		"  --xml                  Use the built-in RDF/XML parser",
		"  --turtle               Use the built-in Turtle parser",
		"  --trig                 Use the built-in TriG parser",
		"  --ntriples             Use the built-in N-Triples parser",
		"  --nquads               Use the built-in N-Quads parser",
		"",
		"Store options:",
		"  --store MODULE.CLASS   Store class, default rdf/store.RDFStore",
		"  --dsn DSN              Connect to a database DSN",
		"  --sqlite FILE          Open a SQLite store file",
		"  --backend BACKEND      Store backend label: sqlite, mysql, postgresql",
		"",
		"Output options:",
		"  --stdout               Serialize the store to STDOUT after parsing",
		"  --output FILE          Serialize the store to FILE after parsing",
		"  --serializer CLASS     Load a serializer class dynamically",
		"  --xml-out              Serialize as RDF/XML",
		"  --turtle-out           Serialize as Turtle",
		"  --trig-out             Serialize as TriG",
		"  --ntriples-out         Serialize as N-Triples",
		"  --nquads-out           Serialize as N-Quads",
		"  -h, --help             Show this help",
	] ) _ "\n";
}

function rdf_cli_serialize_usage () {
	return join( "\n", [
		"Usage: serialize_rdf.zzs [options]",
		"",
		"Store options:",
		"  --store MODULE.CLASS   Store class, default rdf/store.RDFStore",
		"  --dsn DSN              Connect to a database DSN",
		"  --sqlite FILE          Open a SQLite store file",
		"  --backend BACKEND      Store backend label: sqlite, mysql, postgresql",
		"",
		"Output options:",
		"  --stdout               Serialize the store to STDOUT",
		"  --output FILE          Serialize the store to FILE",
		"  --serializer CLASS     Load a serializer class dynamically",
		"  --xml-out              Serialize as RDF/XML",
		"  --turtle-out           Serialize as Turtle",
		"  --trig-out             Serialize as TriG",
		"  --ntriples-out         Serialize as N-Triples",
		"  --nquads-out           Serialize as N-Quads",
		"  -h, --help             Show this help",
	] ) _ "\n";
}

function _rdf_cli_specs () {
	return [
		"help|h",
		"parser=s",
		"xml",
		"turtle",
		"trig",
		"ntriples",
		"nquads",
		"store=s",
		"dsn=s",
		"sqlite=s",
		"backend=s",
		"stdout",
		"output=s",
		"serializer=s",
		"xml-out",
		"turtle-out",
		"trig-out",
		"ntriples-out",
		"nquads-out",
	];
}

function _rdf_cli_load_class ( String spec ) {
	let dot := rindex( spec, "." );
	die "rdf: class spec must be MODULE.CLASS: " _ spec
		if dot <= 0 or dot >= length spec - 1;
	return load_module( substr( spec, 0, dot ), substr( spec, dot + 1 ) );
}

function _rdf_cli_selected_spec (
	Dict opts,
	String explicit,
	Dict shortcuts,
	String label,
	default_spec := null,
) {
	let selected := [];
	selected.push( "" _ opts.get(explicit) ) if opts.get(explicit) != null;
	for ( let name in shortcuts.keys() ) {
		selected.push( shortcuts.get(name) ) if opts.get(name);
	}
	die "rdf: choose only one " _ label if selected.length() > 1;
	return selected.length() == 0 ? default_spec : selected[0];
}

function rdf_cli_parser_spec ( Dict opts ) {
	return _rdf_cli_selected_spec(
		opts,
		"parser",
		_rdf_cli_parser_shortcuts(),
		"parser",
		null,
	);
}

function rdf_cli_serializer_spec ( Dict opts ) {
	return _rdf_cli_selected_spec(
		opts,
		"serializer",
		_rdf_cli_serializer_shortcuts(),
		"serializer",
		RDF_CLI_DEFAULT_SERIALIZER,
	);
}

function rdf_cli_new_parser ( String spec ) {
	let klass := _rdf_cli_load_class(spec);
	return new klass();
}

function rdf_cli_new_serializer ( String spec ) {
	let klass := _rdf_cli_load_class(spec);
	return new klass();
}

function rdf_cli_make_store ( Dict opts, Boolean for_loading ) {
	die "rdf: --dsn and --sqlite cannot be combined"
		if opts{dsn} != null and opts{sqlite} != null;

	let store_spec := opts{store} == null
		? RDF_CLI_DEFAULT_STORE
		: "" _ opts{store};
	let klass := _rdf_cli_load_class(store_spec);
	let dbh := opts{dsn} != null
		? DB.connect( "" _ opts{dsn} )
		: opts{sqlite} != null
			? DB.open( new Path( "" _ opts{sqlite} ) )
			: DB.temp();

	let store := opts{backend} == null
		? new klass(dbh: dbh)
		: new klass( dbh: dbh, backend: "" _ opts{backend} );

	if ( for_loading or ( opts{dsn} == null and opts{sqlite} == null ) ) {
		store.install_schema();
	}
	else {
		store.verify_schema();
	}
	return store;
}

function rdf_cli_read_stdin () {
	let chunks := [];
	while ( true ) {
		let line := STDIN.next_line();
		last if line == null;
		chunks.push(line);
	}
	return join( "", chunks );
}

function rdf_cli_sniff_parser_spec ( String text ) {
	let sample := trim( substr( text, 0, length text > 4096 ? 4096 : length text ) );
	if (
		sample ~ /^<\?xml\b/i or
		sample ~ /^<rdf:RDF\b/i or
		contains( sample, "<rdf:RDF" ) or
		( contains( sample, "rdf-syntax-ns#" ) and contains( sample, "<" ) )
	) {
		return "rdf/parser/rdfxml.RdfXmlParser";
	}
	return "rdf/parser/turtle.TurtleParser";
}

function rdf_cli_parse_input ( store, parser_spec, String label, String text ) {
	let actual_spec := parser_spec == null
		? rdf_cli_sniff_parser_spec(text)
		: parser_spec;
	let parser := rdf_cli_new_parser(actual_spec);
	let quads := parser.parse_string(text);
	die "rdf: parser did not return an Array for " _ label
		unless quads instanceof Array;
	store.add_quads_bulk(quads);
	return quads.length();
}

function rdf_cli_emit_counts ( Array counts ) {
	let cyan := ansi_esc() _ "[36m";
	let reset := ansi_esc() _ "[0m";
	for ( let row in counts ) {
		STDERR.say(
			cyan _ row{label} _ ": " _ row{count} _
			" triples/quads read" _ reset,
		);
	}
}

function rdf_cli_write_output ( String text, output ) {
	if ( output != null ) {
		( new Path( "" _ output ) ).spew_utf8(text);
	}
	else {
		STDOUT.print(text);
	}
}

function rdf_cli_dump_store ( store, Dict opts ) {
	let serializer := rdf_cli_new_serializer(rdf_cli_serializer_spec(opts));
	return serializer.serialize(store.find());
}

function _rdf_cli_validate_output_options ( Dict opts ) {
	die "rdf: --stdout and --output cannot be combined"
		if opts{stdout} and opts{output} != null;
}

function rdf_cli_parse_main ( argv ) {
	let parsed := Getopt.parse( argv, _rdf_cli_specs() );
	if ( not parsed{ok} ) {
		STDERR.say(parsed{error});
		STDERR.print(rdf_cli_parse_usage());
		return 2;
	}

	let opts := parsed{options};
	if ( opts{help} ) {
		STDOUT.print(rdf_cli_parse_usage());
		return 0;
	}
	_rdf_cli_validate_output_options(opts);

	let parser_spec := rdf_cli_parser_spec(opts);
	let store := rdf_cli_make_store( opts, true );
	let counts := [];

	if ( parsed{argv}.length() == 0 ) {
		let text := rdf_cli_read_stdin();
		counts.push({
			label: "<stdin>",
			count: rdf_cli_parse_input( store, parser_spec, "<stdin>", text ),
		});
	}
	else {
		if ( "-" in parsed{argv} and parsed{argv}.length() > 1 ) {
			die "rdf: - can only be used by itself";
		}
		for ( let file in parsed{argv} ) {
			let label := "" _ file;
			let text := label eq "-"
				? rdf_cli_read_stdin()
				: ( new Path(label) ).slurp_utf8();
			counts.push({
				label: label eq "-" ? "<stdin>" : label,
				count: rdf_cli_parse_input(
					store,
					parser_spec,
					label eq "-" ? "<stdin>" : label,
					text,
				),
			});
		}
	}

	if ( opts{stdout} or opts{output} != null ) {
		rdf_cli_write_output( rdf_cli_dump_store( store, opts ), opts{output} );
	}
	rdf_cli_emit_counts(counts);
	return 0;
}

function rdf_cli_serialize_main ( argv ) {
	let parsed := Getopt.parse( argv, _rdf_cli_specs() );
	if ( not parsed{ok} ) {
		STDERR.say(parsed{error});
		STDERR.print(rdf_cli_serialize_usage());
		return 2;
	}

	let opts := parsed{options};
	if ( opts{help} ) {
		STDOUT.print(rdf_cli_serialize_usage());
		return 0;
	}
	die "rdf: serialize_rdf.zzs does not accept input files"
		if parsed{argv}.length() > 0;
	die "rdf: parser options are not accepted by serialize_rdf.zzs"
		if rdf_cli_parser_spec(opts) != null;
	_rdf_cli_validate_output_options(opts);

	let store := rdf_cli_make_store( opts, false );
	rdf_cli_write_output( rdf_cli_dump_store( store, opts ), opts{output} );
	return 0;
}