The London Perl and Raku Workshop takes place on 26th Oct 2024. If your company depends on Perl, please consider sponsoring and/or attending.
package PPI::Token::Unknown;

=pod

=head1 NAME

PPI::Token::Unknown - Token of unknown or as-yet undetermined type

=head1 INHERITANCE

  PPI::Token::Unknown
  isa PPI::Token
      isa PPI::Element

=head1 DESCRIPTION

Object of the type C<PPI::Token::Unknown> exist primarily inside the
tokenizer, where they are temporarily brought into existing for a very
short time to represent a token that could be one of a number of types.

Generally, they only exist for a character or two, after which they are
resolved and converted into the correct type. For an object of this type
to survive the parsing process is considered a major bug.

Please report any C<PPI::Token::Unknown> you encounter in a L<PPI::Document>
object as a bug.

=cut

use strict;
use PPI::Token     ();
use PPI::Exception ();

use vars qw{$VERSION @ISA $CURLY_SYMBOL};
BEGIN {
	$VERSION = '1.224';
	@ISA     = 'PPI::Token';
	$CURLY_SYMBOL = qr{\G\^[[:upper:]_]\w+\}};
}





#####################################################################
# Tokenizer Methods

sub __TOKENIZER__on_char {
	my ( $self, $t ) = @_;                                 # Self and Tokenizer
	my $c    = $t->{token}->{content};                     # Current token
	my $char = substr( $t->{line}, $t->{line_cursor}, 1 ); # Current character

	# Now, we split on the different values of the current content
	if ( $c eq '*' ) {
		# Is it a number?
		if ( $char =~ /\d/ ) {
			# bitwise operator
			$t->{class} = $t->{token}->set_class( 'Operator' );
			return $t->_finalize_token->__TOKENIZER__on_char( $t );
		}

		if ( $char =~ /[\w:]/ ) {
			# Symbol (unless the thing before it is a number
			my ( $prev ) = $t->_previous_significant_tokens(1);
			if ( not $prev or not $prev->isa('PPI::Token::Number') ) {
				$t->{class} = $t->{token}->set_class( 'Symbol' );
				return 1;
			}
		}

		if ( $char eq '{' ) {
			# Get rest of line
			pos $t->{line} = $t->{line_cursor} + 1;
			if ( $t->{line} =~ m/$CURLY_SYMBOL/gc ) {
				# control-character symbol (e.g. *{^_Foo})
				$t->{class} = $t->{token}->set_class( 'Magic' );
				return 1;
			}
		}

		if ( $char eq '*' || $char eq '=' ) {
			# Power operator '**' or mult-assign '*='
			$t->{class} = $t->{token}->set_class( 'Operator' );
			return 1;
		}

		return $self->_as_cast_or_op($t) if $self->_is_cast_or_op($char);

		$t->{class} = $t->{token}->set_class( 'Operator' );
		return $t->_finalize_token->__TOKENIZER__on_char( $t );



	} elsif ( $c eq '$' ) {
		if ( $char =~ /[a-z_]/i ) {
			# Symbol
			$t->{class} = $t->{token}->set_class( 'Symbol' );
			return 1;
		}

		if ( $PPI::Token::Magic::magic{ $c . $char } ) {
			# Magic variable
			$t->{class} = $t->{token}->set_class( 'Magic' );
			return 1;
		}

		if ( $char eq '{' ) {
			# Get rest of line
			pos $t->{line} = $t->{line_cursor} + 1;
			if ( $t->{line} =~ m/$CURLY_SYMBOL/gc ) {
				# control-character symbol (e.g. ${^MATCH})
				$t->{class} = $t->{token}->set_class( 'Magic' );
				return 1;
			}
		}

		# Must be a cast
		$t->{class} = $t->{token}->set_class( 'Cast' );
		return $t->_finalize_token->__TOKENIZER__on_char( $t );



	} elsif ( $c eq '@' ) {
		if ( $char =~ /[\w:]/ ) {
			# Symbol
			$t->{class} = $t->{token}->set_class( 'Symbol' );
			return 1;
		}

		if ( $PPI::Token::Magic::magic{ $c . $char } ) {
			# Magic variable
			$t->{class} = $t->{token}->set_class( 'Magic' );
			return 1;
		}

		if ( $char eq '{' ) {
			# Get rest of line
			pos $t->{line} = $t->{line_cursor} + 1;
			if ( $t->{line} =~ m/$CURLY_SYMBOL/gc ) {
				# control-character symbol (e.g. @{^_Foo})
				$t->{class} = $t->{token}->set_class( 'Magic' );
				return 1;
			}
		}

		# Must be a cast
		$t->{class} = $t->{token}->set_class( 'Cast' );
		return $t->_finalize_token->__TOKENIZER__on_char( $t );



	} elsif ( $c eq '%' ) {
		# Is it a number?
		if ( $char =~ /\d/ ) {
			# bitwise operator
			$t->{class} = $t->{token}->set_class( 'Operator' );
			return $t->_finalize_token->__TOKENIZER__on_char( $t );
		}

		# Is it a magic variable?
		if ( $char eq '^' || $PPI::Token::Magic::magic{ $c . $char } ) {
			$t->{class} = $t->{token}->set_class( 'Magic' );
			return 1;
		}

		if ( $char =~ /[\w:]/ ) {
			# Symbol (unless the thing before it is a number
			my ( $prev ) = $t->_previous_significant_tokens(1);
			if ( not $prev or not $prev->isa('PPI::Token::Number') ) {
				$t->{class} = $t->{token}->set_class( 'Symbol' );
				return 1;
			}
		}

		if ( $char eq '{' ) {
			# Get rest of line
			pos $t->{line} = $t->{line_cursor} + 1;
			if ( $t->{line} =~ m/$CURLY_SYMBOL/gc ) {
				# control-character symbol (e.g. %{^_Foo})
				$t->{class} = $t->{token}->set_class( 'Magic' );
				return 1;
			}
		}

		return $self->_as_cast_or_op($t) if $self->_is_cast_or_op($char);

		# Probably the mod operator
		$t->{class} = $t->{token}->set_class( 'Operator' );
		return $t->{class}->__TOKENIZER__on_char( $t );



	} elsif ( $c eq '&' ) {
		# Is it a number?
		if ( $char =~ /\d/ ) {
			# bitwise operator
			$t->{class} = $t->{token}->set_class( 'Operator' );
			return $t->_finalize_token->__TOKENIZER__on_char( $t );
		}

		if ( $char =~ /[\w:]/ ) {
			# Symbol (unless the thing before it is a number
			my ( $prev ) = $t->_previous_significant_tokens(1);
			if ( not $prev or not $prev->isa('PPI::Token::Number') ) {
				$t->{class} = $t->{token}->set_class( 'Symbol' );
				return 1;
			}
		}

		return $self->_as_cast_or_op($t) if $self->_is_cast_or_op($char);

		# Probably the binary and operator
		$t->{class} = $t->{token}->set_class( 'Operator' );
		return $t->{class}->__TOKENIZER__on_char( $t );



	} elsif ( $c eq '-' ) {
		if ( $char =~ /\d/o ) {
			# Number
			$t->{class} = $t->{token}->set_class( 'Number' );
			return 1;
		}

		if ( $char eq '.' ) {
			# Number::Float
			$t->{class} = $t->{token}->set_class( 'Number::Float' );
			return 1;
		}

		if ( $char =~ /[a-zA-Z]/ ) {
			$t->{class} = $t->{token}->set_class( 'DashedWord' );
			return 1;
		}

		# The numeric negative operator
		$t->{class} = $t->{token}->set_class( 'Operator' );
		return $t->{class}->__TOKENIZER__on_char( $t );



	} elsif ( $c eq ':' ) {
		if ( $char eq ':' ) {
			# ::foo style bareword
			$t->{class} = $t->{token}->set_class( 'Word' );
			return 1;
		}

		# Now, : acts very very differently in different contexts.
		# Mainly, we need to find out if this is a subroutine attribute.
		# We'll leave a hint in the token to indicate that, if it is.
		if ( $self->__TOKENIZER__is_an_attribute( $t ) ) {
			# This : is an attribute indicator
			$t->{class} = $t->{token}->set_class( 'Operator' );
			$t->{token}->{_attribute} = 1;
			return $t->_finalize_token->__TOKENIZER__on_char( $t );
		}

		# It MIGHT be a label, but it's probably the ?: trinary operator
		$t->{class} = $t->{token}->set_class( 'Operator' );
		return $t->{class}->__TOKENIZER__on_char( $t );
	}

	# erm...
	PPI::Exception->throw('Unknown value in PPI::Token::Unknown token');
}

sub _is_cast_or_op {
	my ( $self, $char ) = @_;
	return 1 if $char eq '$';
	return 1 if $char eq '@';
	return 1 if $char eq '%';
	return 1 if $char eq '*';
	return 1 if $char eq '{';
	return;
}

sub _as_cast_or_op {
	my ( $self, $t ) = @_;
	my $class = _cast_or_op( $t );
	$t->{class} = $t->{token}->set_class( $class );
	return $t->_finalize_token->__TOKENIZER__on_char( $t );
}

sub _prev_significant_w_cursor {
	my ( $tokens, $cursor, $extra_check ) = @_;
	while ( $cursor >= 0 ) {
		my $token = $tokens->[ $cursor-- ];
		next if !$token->significant;
		next if $extra_check and !$extra_check->($token);
		return ( $token, $cursor );
	}
	return ( undef, $cursor );
}

# Operator/operand-sensitive, multiple or GLOB cast
sub _cast_or_op {
	my ( $t ) = @_;

	my $tokens = $t->{tokens};
	my $cursor = scalar( @$tokens ) - 1;
	my $token;

	( $token, $cursor ) = _prev_significant_w_cursor( $tokens, $cursor );
	return 'Cast' if !$token;    # token was first in the document

	if ( $token->isa( 'PPI::Token::Structure' ) and $token->content eq '}' ) {

		# Scan the token stream backwards an arbitrarily long way,
		# looking for the matching opening curly brace.
		my $structure_depth = 1;
		( $token, $cursor ) = _prev_significant_w_cursor(
			$tokens, $cursor,
			sub {
				my ( $token ) = @_;
				return if !$token->isa( 'PPI::Token::Structure' );
				if ( $token eq '}' ) {
					$structure_depth++;
					return;
				}
				if ( $token eq '{' ) {
					$structure_depth--;
					return if $structure_depth;
				}
				return 1;
			}
		);
		return 'Operator' if !$token;    # no matching '{', probably an unbalanced '}'

		# Scan past any whitespace
		( $token, $cursor ) = _prev_significant_w_cursor( $tokens, $cursor );
		return 'Operator' if !$token;                             # Document began with what must be a hash constructor.
		return 'Operator' if $token->isa( 'PPI::Token::Symbol' ); # subscript

		my %meth_or_subscript_end = map { $_ => 1 } qw@ -> } ] @;
		return 'Operator' if $meth_or_subscript_end{ $token->content };    # subscript

		my $content = $token->content;
		my $produces_or_wants_value =
		  ( $token->isa( 'PPI::Token::Word' ) and ( $content eq 'do' or $content eq 'eval' ) );
		return $produces_or_wants_value ? 'Operator' : 'Cast';
	}

	my %list_start_or_term_end = map { $_ => 1 } qw@ ; ( { [ @;
	return 'Cast'
	  if $token->isa( 'PPI::Token::Structure' ) and $list_start_or_term_end{ $token->content }
	  or $token->isa( 'PPI::Token::Cast' )
	  or $token->isa( 'PPI::Token::Operator' )
	  or $token->isa( 'PPI::Token::Label' );

	return 'Operator' if !$token->isa( 'PPI::Token::Word' );

	( $token, $cursor ) = _prev_significant_w_cursor( $tokens, $cursor );
	return 'Cast' if !$token || $token->content ne '->';

	return 'Operator';
}

# Are we at a location where a ':' would indicate a subroutine attribute
sub __TOKENIZER__is_an_attribute {
	my $t      = $_[1]; # Tokenizer object
	my @tokens = $t->_previous_significant_tokens(3);
	my $p0     = $tokens[0];
	return '' if not $p0;

	# If we just had another attribute, we are also an attribute
	return 1 if $p0->isa('PPI::Token::Attribute');

	# If we just had a prototype, then we are an attribute
	return 1 if $p0->isa('PPI::Token::Prototype');

	# Other than that, we would need to have had a bareword
	return '' unless $p0->isa('PPI::Token::Word');

	# We could be an anonymous subroutine
	if ( $p0->isa('PPI::Token::Word') and $p0->content eq 'sub' ) {
		return 1;
	}

	# Or, we could be a named subroutine
	my $p1 = $tokens[1];
	my $p2 = $tokens[2];
	if (
		$p1
		and
		$p1->isa('PPI::Token::Word')
		and
		$p1->content eq 'sub'
		and (
			not $p2
			or
			$p2->isa('PPI::Token::Structure')
			or (
				$p2->isa('PPI::Token::Whitespace')
				and
				$p2->content eq ''
			)
		)
	) {
		return 1;
	}

	# We aren't an attribute
	'';	
}

1;

=pod

=head1 SUPPORT

See the L<support section|PPI/SUPPORT> in the main module.

=head1 AUTHOR

Adam Kennedy E<lt>adamk@cpan.orgE<gt>

=head1 COPYRIGHT

Copyright 2001 - 2011 Adam Kennedy.

This program is free software; you can redistribute
it and/or modify it under the same terms as Perl itself.

The full text of the license can be found in the
LICENSE file included with this module.

=cut