The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package JSON::Schema::ToJSON;

use strict;
use warnings;

use Mojo::Base -base;
use Cpanel::JSON::XS;
use JSON::Validator;
use String::Random;
use DateTime;
use Hash::Merge qw/ merge /;

our $VERSION = '0.11';

has _validator  => sub { JSON::Validator->new };
has _str_rand   => sub { String::Random->new };
has _depth      => sub { 0 };

has max_depth   => sub { 10 };
has example_key => sub { 0 };

sub json_schema_to_json {
	my ( $self,%args ) = @_;

	my $schema = $args{schema}; # an already parsed JSON schema

	if ( ! $schema ) {
		$schema = $args{schema_str} # an unparsed JSON schema
			|| die "json_schema_to_json needs schema or schema_str arg";

		eval { $schema = decode_json( $schema ); }
		or do { die "json_schema_to_json failed to parse schema: $@" };
	}

	$self->example_key( $args{example_key} ) if $args{example_key};

	$self->_validator->schema( $schema );
	$schema = $self->_validator->schema->data;

	$self->_depth( $self->_depth + 1 );
	my ( $method,$sub_schema ) = $self->_guess_method( $schema );
	$self->_depth( $self->_depth - 1 ) if $self->_depth;

	return $self->$method( $sub_schema );
}

sub _example_from_spec {
	my ( $self,$schema ) = @_;

	# spec/schema can contain examples that we could use as mock data

	return $schema->{ $self->example_key } # OpenAPI specific
		if $self->example_key && $schema->{ $self->example_key };

	return ();
}

sub _random_boolean {
	my ( $self,$schema ) = @_;

	return $self->_example_from_spec( $schema )
		if scalar $self->_example_from_spec( $schema );

	return rand > 0.5
		? Cpanel::JSON::XS::true
		: Cpanel::JSON::XS::false
}

sub _random_integer {
	my ( $self,$schema ) = @_;

	return $self->_example_from_spec( $schema )
		if scalar $self->_example_from_spec( $schema );

	my $min = $schema->{minimum};
	my $max = $schema->{maximum};
	my $mof = $schema->{multipleOf};

	# by default the min/max values are exclusive
	$min++ if defined $min && $schema->{exclusiveMinimum};
	$max-- if defined $max && $schema->{exclusiveMaximum};

	my @possible_values = defined $min && defined $max
		? $min .. $max
		: defined $min
			? $min .. $min + 1000
			: defined $max
				? 1 .. $max
				: defined $mof
					? $mof .. $mof
					: 1 .. 1000 # short range, prevent creation of a massive array
	;

	# if we have multipleOf just return the first value that fits. note that
	# there is a possible bug here and the JSON schema spec isn't clear about
	# it - it's possible to have a multipleOf that would never be possible
	# given certain minimum and maximum (e.g. 1 .. 3, multiple of 4)
	if ( $mof ) {
		shift( @possible_values ) until (
			! @possible_values
			|| $possible_values[0] % $mof == 0
		);
		return $possible_values[0];
	} else {
		return $self->_random_element( [ @possible_values ] );
	}
}

sub _random_number {
	my ( $self,$schema ) = @_;

	return $self->_example_from_spec( $schema )
		if scalar $self->_example_from_spec( $schema );

	return $self->_random_integer( $schema )
		if ( $schema->{multipleOf} );

	return $self->_random_integer( $schema ) + $self->_random_integer( $schema ) / 10;
}

sub _random_string {
	my ( $self,$schema ) = @_;

	return $self->_example_from_spec( $schema )
		if scalar $self->_example_from_spec( $schema );

	if ( my @enum = @{ $schema->{enum} // [] } ) {
		return $self->_random_element( [ @enum ] );
	}

	return $self->_str_rand->randregex( $schema->{pattern} )
		if $schema->{pattern};

	if ( my $format = $schema->{format} ) {
		return {
			"date-time" => DateTime->now->subtract(
				weeks => $self->_random_integer({ minimum => 1, maximum => 500 }),
				days => $self->_random_integer({ minimum => 1 }),
				hours => $self->_random_integer({ minimum => 1 }),
				minutes => $self->_random_integer({ minimum => 1 }),
				seconds => $self->_random_integer({ minimum => 1 }),
			)->iso8601 . '.000Z',
			"email"     =>
				$self->_random_string( { pattern => '[A-Za-z]{12}' } )
				. '@'
				. $self->_random_string( { pattern => '[A-Za-z]{12}' } )
				. '.com',
			"hostname"  => $self->_random_string( { pattern => '[A-Za-z]{12}' } ),
			"ipv4"      => join( '.',map {  $self->_random_integer({
				minimum => 1,
				maximum => 254,
			}) } 1 .. 4 ),
			"ipv6"      => '2001:0db8:0000:0000:0000:0000:1428:57ab',
			"uri"       => 'https://www.'
				. $self->_random_string( { pattern => '[a-z]{12}' } )
				. '.com',
			"uriref"    => 'https://www.'
				. $self->_random_string( { pattern => '[a-z]{12}' } )
				. '.com',
		}->{ $format };
	}

	my $min = $schema->{minLength}
		|| ( $schema->{maxLength} ? $schema->{maxLength} - 1 : 10 );

	my $max = $schema->{maxLength}
		|| ( $schema->{minLength} ? $schema->{minLength} + 1 : 50 );

	return $self->_str_rand->randpattern(
		'.' x $self->_random_integer( { minimum => $min, maximum => $max } ),
	);
}

sub _random_array {
	my ( $self,$schema ) = @_;

	my $unique = $schema->{uniqueItems};

	my $length = $self->_random_integer({
		minimum => $schema->{minItems}
			|| ( $schema->{maxItems} ? $schema->{maxItems} - 1 : 1 ),
		maximum => $schema->{maxItems}
			|| ( $schema->{minItems} ? $schema->{minItems} + 1 : 5 )
	});

	if ( $self->_depth >= $self->max_depth ) {
		warn __PACKAGE__
			. " hit max depth (@{[ $self->max_depth ]}) in _random_array";
		return [ 1 .. $length ];
	} else {
		$self->_depth( $self->_depth + 1 );
	}

	my @return_items;

	if ( my $items = $schema->{items} ) {

		$self->_depth( $self->_depth + 1 );

		if ( ref( $items ) eq 'ARRAY' ) {

			ADD_ITEM: foreach my $item ( @{ $items } ) {
				last ADD_ITEM if ( $self->_depth >= $self->max_depth );
				$self->_add_next_array_item( \@return_items,$item,$unique )
					|| redo ADD_ITEM; # possible halting problem
			}

		} else {

			ADD_ITEM: foreach my $i ( 1 .. $length ) {
				last ADD_ITEM if ( $self->_depth >= $self->max_depth );
				$self->_add_next_array_item( \@return_items,$items,$unique )
					|| redo ADD_ITEM; # possible halting problem
			}

		}

	} else {
		@return_items = 1 .. $length;
	}

	$self->_depth( $self->_depth - 1 ) if $self->_depth;
	return [ @return_items ];
}

sub _add_next_array_item {
	my ( $self,$array,$schema,$unique ) = @_;

	if ( $self->_depth >= $self->max_depth ) {
		warn __PACKAGE__
			. " hit max depth (@{[ $self->max_depth ]}) in _add_next_array_item";
		push( @{ $array },undef );
		return 1;
	}

	$self->_depth( $self->_depth + 1 );
	my ( $method,$sub_schema ) = $self->_guess_method( $schema );
	my $value = $self->$method( $sub_schema );
	$self->_depth( $self->_depth - 1 ) if $self->_depth;

	if ( ! $unique ) {
		push( @{ $array },$value );
		return 1;
	}

	# unique requires us to check all existing elements of the array and only
	# add the new value if it doesn't already exist
	my %existing = map { $_ => 1 } @{ $array };

	if ( ! $existing{$value} ) {
		push( @{ $array },$value );
		return 1;
	}

	return 0;
}

sub _random_object {
	my ( $self,$schema ) = @_;

	my $object = {};
	my $required;
	my %properties = map { $_ => 1 } keys( %{ $schema->{properties} } );

	if ( $required = $schema->{required} ) {
		# we have a list of required properties, just use those
		%properties = map { $_ => 1 } @{ $required };
	}

	# check max/min properties requirements
	my $min = $schema->{minProperties}
		|| ( $schema->{maxProperties} ? $schema->{maxProperties} - 1 : undef );

	my $max = $schema->{maxProperties}
		|| ( $schema->{minProperties} ? $schema->{minProperties} + 1 : undef );

	if ( ! $min && ! $max ) {
		# no min or max, just make use of all properties
		%properties = map { $_ => 1 } keys( %{ $schema->{properties} } );
	}

	if ( $min && scalar( keys( %properties ) ) < $min ) {
		# we have too few properties
		if ( $max ) {
			# add more properties until we have enough
			MAX_PROP: foreach my $property ( keys( %{ $schema->{properties} } ) ) {
				$properties{$property} = 1;
				last MAX_PROP if scalar( keys( %properties ) ) == $min;
			}
		} else {
			# no max, just make use of all properties
			%properties = map { $_ => 1 } keys( %{ $schema->{properties} } );
		}
	}

	if ( $max && scalar( keys( %properties ) ) > $max ) {
		# we have too many properties, delete some (except those required)
		# until we are below the max permitted amount
		MIN_PROP: foreach my $property ( keys( %{ $schema->{properties} } ) ) {

			delete( $properties{$property} ) if (
				# we can delete, we don't have any required properties
				! $required

				# or this property is not amongst the list of required properties
				|| ! grep { $_ eq $property } @{ $required }
			);

			last MIN_PROP if scalar( keys( %properties ) ) <= $max;
		}
	}

	if ( $self->_depth >= $self->max_depth ) {
		warn __PACKAGE__
			. " hit max depth (@{[ $self->max_depth ]}) in _random_object";
		return {};
	} else {
		$self->_depth( $self->_depth + 1 );
	}

	PROPERTY: foreach my $property ( keys %properties ) {

		$self->_depth( $self->_depth + 1 );

		last PROPERTY if ( $self->_depth >= $self->max_depth );

		my ( $method,$sub_schema )
			= $self->_guess_method( $schema->{properties}{$property} );

		$object->{$property} = $self->$method( $sub_schema );
	}

	$self->_depth( $self->_depth - 1 ) if $self->_depth;

	return $object;
}

sub _random_null { undef }

sub _random_enum {
	my ( $self,$schema ) = @_;
	return $self->_random_element( $schema->{'enum'} );
}

sub _guess_method {
	my ( $self,$schema ) = @_;

	if (
		$schema->{'type'}
		&& ref( $schema->{'type'} ) eq 'ARRAY'
	) {
		$schema->{'type'} = $self->_random_element( $schema->{'type'} );
	}

	# check for combining schemas
	if ( my $any_of = $schema->{'anyOf'} ) {

		# easy, pick a random sub schema
		my $sub_schema = $self->_random_element( $any_of );
		return $self->_guess_method( $sub_schema );

	} elsif ( my $all_of = $schema->{'allOf'} ) {

		# easy? mush these all together and assume the schema doesn't
		# contain any contradictory information. note the mushing
		# together needs to be a little bit smart to prevent stomping
		# on any duplicate keys (hence Hash::Merge)
		my $merged_schema = {};

		foreach my $sub_schema ( @{ $all_of } ) {
			$merged_schema = merge( $merged_schema,$sub_schema );
		}

		return $self->_guess_method( $merged_schema );

	} elsif ( my $one_of = $schema->{'oneOf'} ) {

		# difficult - we need to generate data that validates against
		# one and *only* one of the rules, so here we make a poor
		# attempt and just go by the first rule
		warn __PACKAGE__ . " encountered oneOf, see CAVEATS perldoc section";
		return $self->_guess_method( $one_of->[0] );

	} elsif ( my $not = $schema->{'not'} ) {

		if ( my $not_type = $not->{'type'} ) {

			my $type = {
				"string"  => "integer",
				"integer" => "string",
				"number"  => "string",
				"enum"    => "string",
				"boolean" => "string",
				"null"    => "integer",
				"object"  => "string",
				"array"   => "object",
			}->{ $not_type };

			return $self->_guess_method( { type => $type } );
		}

		# well i don't like this, because by implication it means
		# the data can be anything but the listed one so it seems
		# very handwavy in some cases.
		warn __PACKAGE__ . " encountered not, see CAVEATS perldoc section";
	}

	# danger danger! accessing private method from elsewhere
	my $schema_type = JSON::Validator::_guess_schema_type( $schema );

	$schema_type //= 'null';

	$self->_depth( $self->_depth - 1 ) if $self->_depth;
	return ( "_random_$schema_type",$schema );
}

sub _random_element {
	my ( $self,$list ) = @_;
	return $list->[ int( rand( scalar( @{ $list } ) ) ) ];
}

=encoding utf8

=head1 NAME

JSON::Schema::ToJSON - Generate example JSON structures from JSON Schema definitions

=head1 VERSION

0.11

=head1 SYNOPSIS

    use JSON::Schema::ToJSON;

    my $to_json  = JSON::Schema::ToJSON->new(
        example_key => undef, # set to a key to take example from
        max_depth   => 10,    # increase if you have very deep data structures
    );

    my $perl_string_hash_or_arrayref = $to_json->json_schema_to_json(
        schema     => $already_parsed_json_schema,  # either this
        schema_str => '{ "type" : "boolean" }',     # or this
    );

=head1 DESCRIPTION

L<JSON::Schema::ToJSON> is a class for generating "fake" or "example" JSON data
structures from JSON Schema structures.

=head1 CONSTRUCTOR ARGUMENTS

=head2 example_key

The key that will be used to find example data for use in the returned structure. In
the case of the following schema:

    {
        "type" : "object",
        "properties" : {
            "id" : {
                "type" : "string",
                "description" : "ID of the payment.",
                "x-example" : "123ABC"
            }
        }
    }

Setting example_key to C<x-example> will make the generator return the content of
the C<"x-example"> (123ABC) rather than a random string/int/etc. This is more so
for things like OpenAPI specifications.

You can set this to any key you like, although be careful as you could end up with
invalid data being used (for example an integer field and then using the description
key as the content would not be sensible or valid).

=head2 max_depth

To prevent deep recursion due to circular references in JSON schemas the module has
a default max depth set to a very conservative level of 10. If you need to go deeper
than this then pass a larger value at object construction.

=head1 METHODS

=head2 json_schema_to_json

    my $perl_string_hash_or_arrayref = $to_json->json_schema_to_json(
        schema     => $already_parsed_json_schema,  # either this
        schema_str => '{ "type" : "boolean" }',     # or this
    );

Returns a randomly generated representative data structure that corresponds to the
passed JSON schema. Can take either an already parsed JSON Schema or the raw JSON
Schema string.

=head1 CAVEATS

Caveats? The implementation is incomplete as using some of the more edge case JSON
schema validation options may not generate representative JSON so they will not
validate against the schema on a round trip. These include:

=over 4

=item * additionalItems

This is ignored

=item * additionalProperties and patternProperties

These are also ignored

=item * dependencies

This is *also* ignored, possible result of invalid JSON if used

=item * oneOf

Only the *first* schema from the oneOf list will be used (which means
that the data returned may be invalid against others in the list)

=item * not

Currently any not restrictions are ignored as these can be very hand wavy
but we will try a "best guess" in the case of "not" : { "type" : ... }

=back

In the case of oneOf and not the module will raise a warning to let you know that
potentially invalid JSON has been generated. If you're using this module then you
probably want to avoid oneOf and not in your schemas.

It is also entirely possible to pass a schema that could never be validated, but
will result in a generated structure anyway, example: an integer that has a "minimum"
value of 2, "maximum" value of 4, and must be a "multipleOf" 5 - a nonsensical
combination.

Note that the data generated is completely random, don't expect it to be the same
across runs or calls. The data is also meaningless in terms of what it represents
such that an object property of "name" that is a string will be generated as, for
example, "kj02@#fjs01je#$42wfjs" - The JSON generated is so you have a representative
B<structure>, not representative B<data>. Set example keys in your schema and then
set the C<example_key> in the constructor if you want this to be repeatable and/or
more representative.

=head1 LICENSE

This library is free software; you can redistribute it and/or modify it under
the same terms as Perl itself. If you would like to contribute documentation,
features, bug fixes, or anything else then please raise an issue / pull request:

    https://github.com/Humanstate/json-schema-tojson

=head1 AUTHOR

Lee Johnson - C<leejo@cpan.org>

=cut

1;

# vim:noet:sw=4:ts=4