tokenizer/composer.json000064400000001062150247712330011303 0ustar00{ "name": "theseer/tokenizer", "description": "A small library for converting tokenized PHP source code into XML and potentially other formats", "license": "BSD-3-Clause", "authors": [ { "name": "Arne Blankerts", "email": "arne@blankerts.de", "role": "Developer" } ], "support": { "issues": "https://github.com/theseer/tokenizer/issues" }, "require": { "php": "^7.2 || ^8.0", "ext-xmlwriter": "*", "ext-dom": "*", "ext-tokenizer": "*" }, "autoload": { "classmap": [ "src/" ] } } tokenizer/CHANGELOG.md000064400000003123150247712330010372 0ustar00# Changelog All notable changes to Tokenizer are documented in this file using the [Keep a CHANGELOG](http://keepachangelog.com/) principles. ## [1.2.1] - 2021-07-28 ### Fixed * [#13](https://github.com/theseer/tokenizer/issues/13): Fatal error when tokenizing files that contain only a single empty line ## [1.2.0] - 2020-07-13 This release is now PHP 8.0 compliant. ### Fixed * Whitespace handling in general (only noticable in the intermediate `TokenCollection`) is now consitent ### Changed * Updated `Tokenizer` to deal with changed whitespace handling in PHP 8.0 The XMLSerializer was unaffected. ## [1.1.3] - 2019-06-14 ### Changed * Ensure XMLSerializer can deal with empty token collections ### Fixed * [#2](https://github.com/theseer/tokenizer/issues/2): Fatal error in infection / phpunit ## [1.1.2] - 2019-04-04 ### Changed * Reverted PHPUnit 8 test update to stay PHP 7.0 compliant ## [1.1.1] - 2019-04-03 ### Fixed * [#1](https://github.com/theseer/tokenizer/issues/1): Empty file causes invalid array read ### Changed * Tests should now be PHPUnit 8 compliant ## [1.1.0] - 2017-04-07 ### Added * Allow use of custom namespace for XML serialization ## [1.0.0] - 2017-04-05 Initial Release [1.1.3]: https://github.com/theseer/tokenizer/compare/1.1.2...1.1.3 [1.1.2]: https://github.com/theseer/tokenizer/compare/1.1.1...1.1.2 [1.1.1]: https://github.com/theseer/tokenizer/compare/1.1.0...1.1.1 [1.1.0]: https://github.com/theseer/tokenizer/compare/1.0.0...1.1.0 [1.0.0]: https://github.com/theseer/tokenizer/compare/b2493e57de80c1b7414219b28503fa5c6b4d0a98...1.0.0 tokenizer/src/Tokenizer.php000064400000006555150247712330012047 0ustar00 'T_OPEN_BRACKET', ')' => 'T_CLOSE_BRACKET', '[' => 'T_OPEN_SQUARE', ']' => 'T_CLOSE_SQUARE', '{' => 'T_OPEN_CURLY', '}' => 'T_CLOSE_CURLY', ';' => 'T_SEMICOLON', '.' => 'T_DOT', ',' => 'T_COMMA', '=' => 'T_EQUAL', '<' => 'T_LT', '>' => 'T_GT', '+' => 'T_PLUS', '-' => 'T_MINUS', '*' => 'T_MULT', '/' => 'T_DIV', '?' => 'T_QUESTION_MARK', '!' => 'T_EXCLAMATION_MARK', ':' => 'T_COLON', '"' => 'T_DOUBLE_QUOTES', '@' => 'T_AT', '&' => 'T_AMPERSAND', '%' => 'T_PERCENT', '|' => 'T_PIPE', '$' => 'T_DOLLAR', '^' => 'T_CARET', '~' => 'T_TILDE', '`' => 'T_BACKTICK' ]; public function parse(string $source): TokenCollection { $result = new TokenCollection(); if ($source === '') { return $result; } $tokens = \token_get_all($source); $lastToken = new Token( $tokens[0][2], 'Placeholder', '' ); foreach ($tokens as $pos => $tok) { if (\is_string($tok)) { $token = new Token( $lastToken->getLine(), $this->map[$tok], $tok ); $result->addToken($token); $lastToken = $token; continue; } $line = $tok[2]; $values = \preg_split('/\R+/Uu', $tok[1]); foreach ($values as $v) { $token = new Token( $line, \token_name($tok[0]), $v ); $lastToken = $token; $line++; if ($v === '') { continue; } $result->addToken($token); } } return $this->fillBlanks($result, $lastToken->getLine()); } private function fillBlanks(TokenCollection $tokens, int $maxLine): TokenCollection { $prev = new Token( 0, 'Placeholder', '' ); $final = new TokenCollection(); foreach ($tokens as $token) { if ($prev === null) { $final->addToken($token); $prev = $token; continue; } $gap = $token->getLine() - $prev->getLine(); while ($gap > 1) { $linebreak = new Token( $prev->getLine() + 1, 'T_WHITESPACE', '' ); $final->addToken($linebreak); $prev = $linebreak; $gap--; } $final->addToken($token); $prev = $token; } $gap = $maxLine - $prev->getLine(); while ($gap > 0) { $linebreak = new Token( $prev->getLine() + 1, 'T_WHITESPACE', '' ); $final->addToken($linebreak); $prev = $linebreak; $gap--; } return $final; } } tokenizer/src/TokenCollection.php000064400000004276150247712330013167 0ustar00tokens[] = $token; } public function current(): Token { return \current($this->tokens); } public function key(): int { return \key($this->tokens); } public function next(): void { \next($this->tokens); $this->pos++; } public function valid(): bool { return $this->count() > $this->pos; } public function rewind(): void { \reset($this->tokens); $this->pos = 0; } public function count(): int { return \count($this->tokens); } public function offsetExists($offset): bool { return isset($this->tokens[$offset]); } /** * @throws TokenCollectionException */ public function offsetGet($offset): Token { if (!$this->offsetExists($offset)) { throw new TokenCollectionException( \sprintf('No Token at offest %s', $offset) ); } return $this->tokens[$offset]; } /** * @param Token $value * * @throws TokenCollectionException */ public function offsetSet($offset, $value): void { if (!\is_int($offset)) { $type = \gettype($offset); throw new TokenCollectionException( \sprintf( 'Offset must be of type integer, %s given', $type === 'object' ? \get_class($value) : $type ) ); } if (!$value instanceof Token) { $type = \gettype($value); throw new TokenCollectionException( \sprintf( 'Value must be of type %s, %s given', Token::class, $type === 'object' ? \get_class($value) : $type ) ); } $this->tokens[$offset] = $value; } public function offsetUnset($offset): void { unset($this->tokens[$offset]); } } tokenizer/src/Token.php000064400000001204150247712330011137 0ustar00line = $line; $this->name = $name; $this->value = $value; } public function getLine(): int { return $this->line; } public function getName(): string { return $this->name; } public function getValue(): string { return $this->value; } } tokenizer/src/TokenCollectionException.php000064400000000164150247712330015036 0ustar00xmlns = $xmlns; } public function toDom(TokenCollection $tokens): DOMDocument { $dom = new DOMDocument(); $dom->preserveWhiteSpace = false; $dom->loadXML($this->toXML($tokens)); return $dom; } public function toXML(TokenCollection $tokens): string { $this->writer = new \XMLWriter(); $this->writer->openMemory(); $this->writer->setIndent(true); $this->writer->startDocument(); $this->writer->startElement('source'); $this->writer->writeAttribute('xmlns', $this->xmlns->asString()); if (\count($tokens) > 0) { $this->writer->startElement('line'); $this->writer->writeAttribute('no', '1'); $this->previousToken = $tokens[0]; foreach ($tokens as $token) { $this->addToken($token); } } $this->writer->endElement(); $this->writer->endElement(); $this->writer->endDocument(); return $this->writer->outputMemory(); } private function addToken(Token $token): void { if ($this->previousToken->getLine() < $token->getLine()) { $this->writer->endElement(); $this->writer->startElement('line'); $this->writer->writeAttribute('no', (string)$token->getLine()); $this->previousToken = $token; } if ($token->getValue() !== '') { $this->writer->startElement('token'); $this->writer->writeAttribute('name', $token->getName()); $this->writer->writeRaw(\htmlspecialchars($token->getValue(), \ENT_NOQUOTES | \ENT_DISALLOWED | \ENT_XML1)); $this->writer->endElement(); } } } tokenizer/src/NamespaceUriException.php000064400000000161150247712330014313 0ustar00ensureValidUri($value); $this->value = $value; } public function asString(): string { return $this->value; } private function ensureValidUri($value): void { if (\strpos($value, ':') === false) { throw new NamespaceUriException( \sprintf("Namespace URI '%s' must contain at least one colon", $value) ); } } } tokenizer/src/Exception.php000064400000000146150247712330012021 0ustar00registerCustomFixers([ new \PharIo\CSFixer\PhpdocSingleLineVarFixer() ]) ->setRiskyAllowed(true) ->setRules( [ 'PharIo/phpdoc_single_line_var_fixer' => true, 'align_multiline_comment' => true, 'array_indentation' => true, 'array_syntax' => ['syntax' => 'short'], 'binary_operator_spaces' => [ 'operators' => [ '=' => 'align_single_space_minimal', '=>' => 'align', ], ], 'blank_line_after_namespace' => true, 'blank_line_after_opening_tag' => false, 'blank_line_before_statement' => [ 'statements' => [ 'break', 'continue', 'declare', 'do', 'for', 'foreach', 'if', 'include', 'include_once', 'require', 'require_once', 'return', 'switch', 'throw', 'try', 'while', 'yield', ], ], 'braces' => [ 'allow_single_line_closure' => false, 'position_after_anonymous_constructs' => 'same', 'position_after_control_structures' => 'same', 'position_after_functions_and_oop_constructs' => 'same' ], 'cast_spaces' => ['space' => 'none'], // This fixer removes the blank line at class start, no way to disable that, so we disable the fixer :( //'class_attributes_separation' => ['elements' => ['const', 'method', 'property']], 'combine_consecutive_issets' => true, 'combine_consecutive_unsets' => true, 'compact_nullable_typehint' => true, 'concat_space' => ['spacing' => 'one'], 'date_time_immutable' => true, 'declare_equal_normalize' => ['space' => 'single'], 'declare_strict_types' => true, 'dir_constant' => true, 'elseif' => true, 'encoding' => true, 'full_opening_tag' => true, 'fully_qualified_strict_types' => true, 'function_declaration' => [ 'closure_function_spacing' => 'one' ], 'header_comment' => false, 'indentation_type' => true, 'is_null' => true, 'line_ending' => true, 'list_syntax' => ['syntax' => 'short'], 'logical_operators' => true, 'lowercase_cast' => true, 'lowercase_constants' => true, 'lowercase_keywords' => true, 'lowercase_static_reference' => true, 'magic_constant_casing' => true, 'method_argument_space' => ['ensure_fully_multiline' => true], 'modernize_types_casting' => true, 'multiline_comment_opening_closing' => true, 'multiline_whitespace_before_semicolons' => true, 'native_constant_invocation' => true, 'native_function_casing' => true, 'native_function_invocation' => true, 'new_with_braces' => false, 'no_alias_functions' => true, 'no_alternative_syntax' => true, 'no_blank_lines_after_class_opening' => false, 'no_blank_lines_after_phpdoc' => true, 'no_blank_lines_before_namespace' => true, 'no_closing_tag' => true, 'no_empty_comment' => true, 'no_empty_phpdoc' => true, 'no_empty_statement' => true, 'no_extra_blank_lines' => true, 'no_homoglyph_names' => true, 'no_leading_import_slash' => true, 'no_leading_namespace_whitespace' => true, 'no_mixed_echo_print' => ['use' => 'print'], 'no_multiline_whitespace_around_double_arrow' => true, 'no_null_property_initialization' => true, 'no_php4_constructor' => true, 'no_short_bool_cast' => true, 'no_short_echo_tag' => true, 'no_singleline_whitespace_before_semicolons' => true, 'no_spaces_after_function_name' => true, 'no_spaces_inside_parenthesis' => true, 'no_superfluous_elseif' => true, 'no_superfluous_phpdoc_tags' => true, 'no_trailing_comma_in_list_call' => true, 'no_trailing_comma_in_singleline_array' => true, 'no_trailing_whitespace' => true, 'no_trailing_whitespace_in_comment' => true, 'no_unneeded_control_parentheses' => false, 'no_unneeded_curly_braces' => false, 'no_unneeded_final_method' => true, 'no_unreachable_default_argument_value' => true, 'no_unset_on_property' => true, 'no_unused_imports' => true, 'no_useless_else' => true, 'no_useless_return' => true, 'no_whitespace_before_comma_in_array' => true, 'no_whitespace_in_blank_line' => true, 'non_printable_character' => true, 'normalize_index_brace' => true, 'object_operator_without_whitespace' => true, 'ordered_class_elements' => [ 'order' => [ 'use_trait', 'constant_public', 'constant_protected', 'constant_private', 'property_public_static', 'property_protected_static', 'property_private_static', 'property_public', 'property_protected', 'property_private', 'method_public_static', 'construct', 'destruct', 'magic', 'phpunit', 'method_public', 'method_protected', 'method_private', 'method_protected_static', 'method_private_static', ], ], 'ordered_imports' => true, 'phpdoc_add_missing_param_annotation' => true, 'phpdoc_align' => true, 'phpdoc_annotation_without_dot' => true, 'phpdoc_indent' => true, 'phpdoc_no_access' => true, 'phpdoc_no_empty_return' => true, 'phpdoc_no_package' => true, 'phpdoc_order' => true, 'phpdoc_return_self_reference' => true, 'phpdoc_scalar' => true, 'phpdoc_separation' => true, 'phpdoc_single_line_var_spacing' => true, 'phpdoc_to_comment' => false, 'phpdoc_trim' => true, 'phpdoc_trim_consecutive_blank_line_separation' => true, 'phpdoc_types' => ['groups' => ['simple', 'meta']], 'phpdoc_types_order' => true, 'phpdoc_to_return_type' => true, 'phpdoc_var_without_name' => true, 'pow_to_exponentiation' => true, 'protected_to_private' => true, 'return_assignment' => true, 'return_type_declaration' => ['space_before' => 'none'], 'self_accessor' => false, 'semicolon_after_instruction' => true, 'set_type_to_cast' => true, 'short_scalar_cast' => true, 'simplified_null_return' => true, 'single_blank_line_at_eof' => true, 'single_import_per_statement' => true, 'single_line_after_imports' => true, 'single_quote' => true, 'standardize_not_equals' => true, 'ternary_to_null_coalescing' => true, 'trailing_comma_in_multiline_array' => false, 'trim_array_spaces' => true, 'unary_operator_spaces' => true, 'visibility_required' => [ 'elements' => [ 'const', 'method', 'property', ], ], 'void_return' => true, 'whitespace_after_comma_in_array' => true, 'yoda_style' => false ] ) ->setFinder( PhpCsFixer\Finder::create() ->files() ->in(__DIR__ . '/src') ->in(__DIR__ . '/tests') ->notName('*.phpt') ->notName('autoload.php') ); tokenizer/README.md000064400000003631150247712330010044 0ustar00# Tokenizer A small library for converting tokenized PHP source code into XML. [![Test](https://github.com/theseer/tokenizer/actions/workflows/ci.yml/badge.svg)](https://github.com/theseer/tokenizer/actions/workflows/ci.yml) [![Scrutinizer Code Quality](https://scrutinizer-ci.com/g/theseer/tokenizer/badges/quality-score.png?b=master)](https://scrutinizer-ci.com/g/theseer/tokenizer/?branch=master) [![Code Coverage](https://scrutinizer-ci.com/g/theseer/tokenizer/badges/coverage.png?b=master)](https://scrutinizer-ci.com/g/theseer/tokenizer/?branch=master) [![Build Status](https://scrutinizer-ci.com/g/theseer/tokenizer/badges/build.png?b=master)](https://scrutinizer-ci.com/g/theseer/tokenizer/build-status/master) ## Installation You can add this library as a local, per-project dependency to your project using [Composer](https://getcomposer.org/): composer require theseer/tokenizer If you only need this library during development, for instance to run your project's test suite, then you should add it as a development-time dependency: composer require --dev theseer/tokenizer ## Usage examples ```php $tokenizer = new TheSeer\Tokenizer\Tokenizer(); $tokens = $tokenizer->parse(file_get_contents(__DIR__ . '/src/XMLSerializer.php')); $serializer = new TheSeer\Tokenizer\XMLSerializer(); $xml = $serializer->toXML($tokens); echo $xml; ``` The generated XML structure looks something like this: ```xml <?php declare ( strict_types = 1 ) ; ``` tokenizer/LICENSE000064400000002774150247712330007601 0ustar00Tokenizer Copyright (c) 2017 Arne Blankerts and contributors All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of Arne Blankerts nor the names of contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT * NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. tokenizer/composer.lock000064400000001217150247712330011264 0ustar00{ "_readme": [ "This file locks the dependencies of your project to a known state", "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", "This file is @generated automatically" ], "content-hash": "b010f1b3d9d47d431ee1cb54ac1de755", "packages": [], "packages-dev": [], "aliases": [], "minimum-stability": "stable", "stability-flags": [], "prefer-stable": false, "prefer-lowest": false, "platform": { "php": "^7.2 || ^8.0", "ext-xmlwriter": "*", "ext-dom": "*", "ext-tokenizer": "*" }, "platform-dev": [] }