One Hat Cyber Team
Your IP:
216.73.216.176
Server IP:
198.54.114.155
Server:
Linux server71.web-hosting.com 4.18.0-513.18.1.lve.el8.x86_64 #1 SMP Thu Feb 22 12:55:50 UTC 2024 x86_64
Server Software:
LiteSpeed
PHP Version:
5.6.40
Create File
|
Create Folder
Execute
Dir :
~
/
proc
/
thread-self
/
root
/
proc
/
self
/
cwd
/
Edit File:
robots-txt.tar
src/RobotsTxt.php 0000644 00000013104 15111416774 0010022 0 ustar 00 <?php namespace Spatie\Robots; class RobotsTxt { protected static array $robotsCache = []; protected array $disallowsPerUserAgent = []; public static function readFrom(string $source): self { $content = @file_get_contents($source); return new self($content !== false ? $content : ''); } public function __construct(string $content) { $this->disallowsPerUserAgent = $this->getDisallowsPerUserAgent($content); } public static function create(string $source): self { if ( strpos($source, 'http') !== false && strpos($source, 'robots.txt') !== false ) { return self::readFrom($source); } return new self($source); } public function allows(string $url, string | null $userAgent = '*'): bool { $requestUri = ''; $parts = parse_url($url); if ($parts !== false) { if (isset($parts['path'])) { $requestUri .= $parts['path']; } if (isset($parts['query'])) { $requestUri .= '?'.$parts['query']; } elseif ($this->hasEmptyQueryString($url)) { $requestUri .= '?'; } } $disallows = $this->disallowsPerUserAgent[strtolower(trim($userAgent))] ?? $this->disallowsPerUserAgent['*'] ?? []; return ! $this->pathIsDenied($requestUri, $disallows); } protected function pathIsDenied(string $requestUri, array $disallows): bool { foreach ($disallows as $disallow) { if ($disallow === '') { continue; } $stopAtEndOfString = false; if ($disallow[-1] === '$') { // if the pattern ends with a dollar sign, the string must end there $disallow = substr($disallow, 0, -1); $stopAtEndOfString = true; } // convert to regexp $disallowRegexp = preg_quote($disallow, '/'); // the pattern must start at the beginning of the string... $disallowRegexp = '^'.$disallowRegexp; // ...and optionally stop at the end of the string if ($stopAtEndOfString) { $disallowRegexp .= '$'; } // replace (preg_quote'd) stars with an eager match $disallowRegexp = str_replace('\\*', '.*', $disallowRegexp); // enclose in delimiters $disallowRegexp = '/'.$disallowRegexp.'/'; if (preg_match($disallowRegexp, $requestUri) === 1) { return true; } } return false; } /** * Checks for an empty query string. * * This works around the fact that parse_url() will not set the 'query' key when the query string is empty. * See: https://bugs.php.net/bug.php?id=78385 */ protected function hasEmptyQueryString(string $url): bool { if ($url === '') { return false; } if ($url[-1] === '?') { // ends with ? return true; } if (strpos($url, '?#') !== false) { // empty query string, followed by a fragment return true; } return false; } protected function getDisallowsPerUserAgent(string $content): array { $lines = explode(PHP_EOL, $content); $lines = array_filter($lines); $disallowsPerUserAgent = []; $currentUserAgents = []; $treatAllowDisallowLine = false; foreach ($lines as $line) { if ($this->isComment($line)) { continue; } if ($this->isEmptyLine($line)) { continue; } if ($this->isUserAgentLine($line)) { if ($treatAllowDisallowLine) { $treatAllowDisallowLine = false; $currentUserAgents = []; } $disallowsPerUserAgent[$this->parseUserAgent($line)] = []; $currentUserAgents[] = &$disallowsPerUserAgent[$this->parseUserAgent($line)]; continue; } if ($this->isDisallowLine($line)) { $treatAllowDisallowLine = true; } if ($this->isAllowLine($line)) { $treatAllowDisallowLine = true; continue; } $disallowUrl = $this->parseDisallow($line); foreach ($currentUserAgents as &$currentUserAgent) { $currentUserAgent[$disallowUrl] = $disallowUrl; } } return $disallowsPerUserAgent; } protected function isComment(string $line): bool { return strpos(trim($line), '#') === 0; } protected function isEmptyLine(string $line): bool { return trim($line) === ''; } protected function isUserAgentLine(string $line): bool { return strpos(trim(strtolower($line)), 'user-agent') === 0; } protected function parseUserAgent(string $line): string { return trim(str_replace('user-agent', '', strtolower(trim($line))), ': '); } protected function parseDisallow(string $line): string { return trim(substr_replace(strtolower(trim($line)), '', 0, 8), ': '); } protected function isDisallowLine(string $line): string { return trim(substr(str_replace(' ', '', strtolower(trim($line))), 0, 8), ': ') === 'disallow'; } protected function isAllowLine(string $line): string { return trim(substr(str_replace(' ', '', strtolower(trim($line))), 0, 6), ': ') === 'allow'; } } src/Robots.php 0000644 00000002765 15111416774 0007335 0 ustar 00 <?php namespace Spatie\Robots; class Robots { protected RobotsTxt | null $robotsTxt; public function __construct( protected string | null $userAgent = null, string | null $source = null, ) { $this->robotsTxt = $source ? RobotsTxt::readFrom($source) : null; } public function withTxt(string $source): self { $this->robotsTxt = RobotsTxt::readFrom($source); return $this; } public static function create(string $userAgent = null, string $source = null): self { return new self($userAgent, $source); } public function mayIndex(string $url, string $userAgent = null): bool { $userAgent = $userAgent ?? $this->userAgent; $robotsTxt = $this->robotsTxt ?? RobotsTxt::create($this->createRobotsUrl($url)); return $robotsTxt->allows($url, $userAgent) && RobotsMeta::readFrom($url)->mayIndex() && RobotsHeaders::readFrom($url)->mayIndex(); } public function mayFollowOn(string $url): bool { return RobotsMeta::readFrom($url)->mayFollow() && RobotsHeaders::readFrom($url)->mayFollow(); } protected function createRobotsUrl(string $url): string { $robotsUrl = parse_url($url, PHP_URL_SCHEME).'://'.parse_url($url, PHP_URL_HOST); if ($port = parse_url($url, PHP_URL_PORT)) { $robotsUrl .= ":{$port}"; } return "{$robotsUrl}/robots.txt"; } } src/RobotsMeta.php 0000644 00000003477 15111416774 0010145 0 ustar 00 <?php namespace Spatie\Robots; use InvalidArgumentException; use JetBrains\PhpStorm\ArrayShape; class RobotsMeta { protected array $robotsMetaTagProperties = []; public static function readFrom(string $source): self { $content = @file_get_contents($source); if ($content === false) { throw new InvalidArgumentException("Could not read from source `{$source}`"); } return new self($content); } public static function create(string $source): self { return new self($source); } public function __construct(string $html) { $this->robotsMetaTagProperties = $this->findRobotsMetaTagProperties($html); } public function mayIndex(): bool { return ! $this->noindex(); } public function mayFollow(): bool { return ! $this->nofollow(); } public function noindex(): bool { return $this->robotsMetaTagProperties['noindex'] ?? false; } public function nofollow(): bool { return $this->robotsMetaTagProperties['nofollow'] ?? false; } #[ArrayShape(['noindex' => "bool", 'nofollow' => "bool"])] protected function findRobotsMetaTagProperties(string $html): array { $metaTagLine = $this->findRobotsMetaTagLine($html); return [ 'noindex' => $metaTagLine ? strpos(strtolower($metaTagLine), 'noindex') !== false : false, 'nofollow' => $metaTagLine ? strpos(strtolower($metaTagLine), 'nofollow') !== false : false, ]; } protected function findRobotsMetaTagLine(string $html): ?string { if (preg_match('/\<meta name=("|\')robots("|\').*?\>/mis', $html, $matches)) { return $matches[0]; } return null; } } src/RobotsHeaders.php 0000644 00000005754 15111416775 0010633 0 ustar 00 <?php namespace Spatie\Robots; use InvalidArgumentException; class RobotsHeaders { protected array $robotHeadersProperties = []; public static function readFrom(string $source): self { $content = @file_get_contents($source); if ($content === false) { throw new InvalidArgumentException("Could not read from source `{$source}`"); } return new self($http_response_header ?? []); } public static function create(array $headers): self { return new self($headers); } public function __construct(array $headers) { $this->robotHeadersProperties = $this->parseHeaders($headers); } public function mayIndex(string $userAgent = '*'): bool { return $this->none($userAgent) ? false : ! $this->noindex($userAgent); } public function mayFollow(string $userAgent = '*'): bool { return $this->none($userAgent) ? false : ! $this->nofollow($userAgent); } public function noindex(string $userAgent = '*'): bool { return $this->robotHeadersProperties[$userAgent]['noindex'] ?? $this->robotHeadersProperties['*']['noindex'] ?? false; } public function nofollow(string $userAgent = '*'): bool { return $this->robotHeadersProperties[$userAgent]['nofollow'] ?? $this->robotHeadersProperties['*']['nofollow'] ?? false; } public function none(string $userAgent = '*'): bool { return $this->robotHeadersProperties[$userAgent]['none'] ?? $this->robotHeadersProperties['*']['none'] ?? false; } protected function parseHeaders(array $headers): array { $robotHeaders = $this->filterRobotHeaders($headers); return array_reduce($robotHeaders, function (array $parsedHeaders, $header) { $header = $this->normalizeHeaders($header); $headerParts = explode(':', $header); $userAgent = count($headerParts) === 3 ? trim($headerParts[1]) : '*'; $options = end($headerParts); $parsedHeaders[$userAgent] = [ 'noindex' => strpos(strtolower($options), 'noindex') !== false, 'nofollow' => strpos(strtolower($options), 'nofollow') !== false, 'none' => strpos(strtolower($options), 'none') !== false, ]; return $parsedHeaders; }, []); } protected function filterRobotHeaders(array $headers): array { return array_filter($headers, function ($header) use ($headers) { $headerContent = $this->normalizeHeaders($headers[$header] ?? []); return strpos(strtolower($header), 'x-robots-tag') === 0 || strpos(strtolower($headerContent), 'x-robots-tag') === 0; }, ARRAY_FILTER_USE_KEY); } protected function normalizeHeaders($headers): string { return implode(',', (array) $headers); } } README.md 0000644 00000006173 15111416775 0006042 0 ustar 00 [<img src="https://github-ads.s3.eu-central-1.amazonaws.com/support-ukraine.svg?t=1" />](https://supportukrainenow.org) # Parse `robots.txt`, `robots` meta and headers [](https://packagist.org/packages/spatie/robots-txt)  [](https://scrutinizer-ci.com/g/spatie/robots-txt) [](https://packagist.org/packages/spatie/robots-txt) Determine if a page may be crawled from robots.txt, robots meta tags and robot headers. ## Support us [<img src="https://github-ads.s3.eu-central-1.amazonaws.com/robots-txt.jpg?t=1" width="419px" />](https://spatie.be/github-ad-click/robots-txt) We invest a lot of resources into creating [best in class open source packages](https://spatie.be/open-source). You can support us by [buying one of our paid products](https://spatie.be/open-source/support-us). We highly appreciate you sending us a postcard from your hometown, mentioning which of our package(s) you are using. You'll find our address on [our contact page](https://spatie.be/about-us). We publish all received postcards on [our virtual postcard wall](https://spatie.be/open-source/postcards). ## Installation You can install the package via composer: ```bash composer require spatie/robots-txt ``` ## Usage ``` php $robots = Spatie\Robots\Robots::create(); $robots->mayIndex('https://www.spatie.be/nl/admin'); $robots->mayFollowOn('https://www.spatie.be/nl/admin'); ``` You can also specify a user agent: ``` php $robots = Spatie\Robots\Robots::create('UserAgent007'); ``` By default, `Robots` will look for a `robots.txt` file on `https://host.com/robots.txt`. Another location can be specified like so: ``` php $robots = Spatie\Robots\Robots::create() ->withTxt('https://www.spatie.be/robots-custom.txt'); $robots = Spatie\Robots\Robots::create() ->withTxt(__DIR__ . '/public/robots.txt'); ``` ### Testing ``` bash composer test ``` ### Changelog Please see [CHANGELOG](CHANGELOG.md) for more information what has changed recently. ## Contributing Please see [CONTRIBUTING](https://github.com/spatie/.github/blob/main/CONTRIBUTING.md) for details. ## Security Vulnerabilities Please review [our security policy](../../security/policy) on how to report security vulnerabilities. ## Postcardware You're free to use this package, but if it makes it to your production environment we highly appreciate you sending us a postcard from your hometown, mentioning which of our package(s) you are using. Our address is: Spatie, Kruikstraat 22, 2018 Antwerp, Belgium. We publish all received postcards [on our company website](https://spatie.be/en/opensource/postcards). ## Credits - [Brent Roose](https://github.com/brendt) - [All Contributors](../../contributors) ## License The MIT License (MIT). Please see [License File](LICENSE.md) for more information. composer.json 0000644 00000001733 15111416775 0007302 0 ustar 00 { "name": "spatie/robots-txt", "description": "Determine if a page may be crawled from robots.txt and robots meta tags", "keywords": [ "spatie", "robots-txt" ], "homepage": "https://github.com/spatie/robots-txt", "license": "MIT", "authors": [ { "name": "Brent Roose", "email": "brent@spatie.be", "homepage": "https://spatie.be", "role": "Developer" } ], "require": { "php": "^8.0" }, "require-dev": { "larapack/dd": "^1.0", "phpunit/phpunit": "^8.0 || ^9.0" }, "autoload": { "psr-4": { "Spatie\\Robots\\": "src" } }, "autoload-dev": { "psr-4": { "Spatie\\Robots\\Tests\\": "tests" } }, "scripts": { "test": "vendor/bin/phpunit", "test-coverage": "phpunit --coverage-html coverage" }, "config": { "sort-packages": true } } LICENSE.md 0000644 00000002102 15111416775 0006153 0 ustar 00 The MIT License (MIT) Copyright (c) Spatie bvba <info@spatie.be> Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. CHANGELOG.md 0000644 00000002064 15111416776 0006370 0 ustar 00 # Changelog All notable changes to `robots-txt` will be documented in this file ## 2.0.1 - 2021-05-06 - added x-robots-tag: none (#32) ## 2.0.0 - 2021-03-28 - require PHP 8+ - drop support for PHP 7.x - convert syntax to PHP 8 - remove deprecated methods - use php-cs-fixer & github workflow ## 1.0.10 - 2020-12-08 - handle multiple user-agent (#29) ## 1.0.9 - 2020-11-27 - add support for PHP 8.0 + move to GitHub actions (#27) ## 1.0.8 - 2020-09-12 - make user agent checks case-insensitive ## 1.0.7 - 2020-04-29 - fix find robots meta tag line if minified code (#23) ## 1.0.6 - 2020-04-07 - fix headers checking (nofollow, noindex) for custom userAgent (#21) ## 1.0.5 - 2019-08-08 - improvements around handling of wildcards, end-of-string, query string ## 1.0.4 - 2019-08-07 - improve readability ## 1.0.3 - 2019-03-11 - fix parsing robotstxt urls with keywords (#14) ## 1.0.2 - 2019-01-11 - make robots.txt check case insensitive ## 1.0.1 - 2018-05-07 - prevent exception if the domain has no robots.txt ## 1.0.0 - 2018-05-07 - initial release .php_cs.cache 0000644 00000003273 15111416776 0007101 0 ustar 00 {"php":"8.0.3","version":"2.18.5","indent":" ","lineEnding":"\n","rules":{"blank_line_after_namespace":true,"braces":true,"class_definition":true,"constant_case":true,"elseif":true,"function_declaration":true,"indentation_type":true,"line_ending":true,"lowercase_keywords":true,"method_argument_space":{"on_multiline":"ensure_fully_multiline","keep_multiple_spaces_after_comma":true},"no_break_comment":true,"no_closing_tag":true,"no_spaces_after_function_name":true,"no_spaces_inside_parenthesis":true,"no_trailing_whitespace":true,"no_trailing_whitespace_in_comment":true,"single_blank_line_at_eof":true,"single_class_element_per_statement":{"elements":["property"]},"single_import_per_statement":true,"single_line_after_imports":true,"switch_case_semicolon_to_colon":true,"switch_case_space":true,"visibility_required":true,"encoding":true,"full_opening_tag":true,"array_syntax":{"syntax":"short"},"ordered_imports":{"sortAlgorithm":"alpha"},"no_unused_imports":true,"not_operator_with_successor_space":true,"trailing_comma_in_multiline_array":true,"phpdoc_scalar":true,"unary_operator_spaces":true,"binary_operator_spaces":true,"blank_line_before_statement":{"statements":["break","continue","declare","return","throw","try"]},"phpdoc_single_line_var_spacing":true,"phpdoc_var_without_name":true,"class_attributes_separation":{"elements":["method"]},"single_trait_insert_per_statement":true},"hashes":{"src\/RobotsTxt.php":2756145462,"src\/RobotsMeta.php":3792602502,"src\/RobotsHeaders.php":3731502293,"src\/Robots.php":3883080972,"tests\/RobotsHeadersTest.php":105355989,"tests\/RobotsTxtTest.php":4268517613,"tests\/TestCase.php":855669975,"tests\/RobotsTest.php":651922754,"tests\/RobotsMetaTest.php":335948072}} .php_cs.dist 0000644 00000002377 15111416776 0007005 0 ustar 00 <?php $finder = Symfony\Component\Finder\Finder::create() ->in([ __DIR__ . '/src', __DIR__ . '/tests', ]) ->name('*.php') ->notName('*.blade.php') ->ignoreDotFiles(true) ->ignoreVCS(true); return PhpCsFixer\Config::create() ->setRules([ '@PSR2' => true, 'array_syntax' => ['syntax' => 'short'], 'ordered_imports' => ['sortAlgorithm' => 'alpha'], 'no_unused_imports' => true, 'not_operator_with_successor_space' => true, 'trailing_comma_in_multiline_array' => true, 'phpdoc_scalar' => true, 'unary_operator_spaces' => true, 'binary_operator_spaces' => true, 'blank_line_before_statement' => [ 'statements' => ['break', 'continue', 'declare', 'return', 'throw', 'try'], ], 'phpdoc_single_line_var_spacing' => true, 'phpdoc_var_without_name' => true, 'class_attributes_separation' => [ 'elements' => [ 'method', ], ], 'method_argument_space' => [ 'on_multiline' => 'ensure_fully_multiline', 'keep_multiple_spaces_after_comma' => true, ], 'single_trait_insert_per_statement' => true, ]) ->setFinder($finder); .github/FUNDING.yml 0000644 00000000100 15111416776 0007721 0 ustar 00 github: spatie custom: https://spatie.be/open-source/support-us .github/workflows/run-tests.yml 0000644 00000002400 15111416776 0012635 0 ustar 00 name: Tests on: [push, pull_request] jobs: test: runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: os: [ubuntu-latest] php: [8.1, 8.0] dependency-version: [prefer-lowest, prefer-stable] name: P${{ matrix.php }} - ${{ matrix.dependency-version }} - ${{ matrix.os }} steps: - name: Checkout code uses: actions/checkout@v2 - name: Install and start test server run: | cd tests/server npm install (node server.js &) || /bin/true - name: Wait for server bootup run: sleep 5 - name: Setup PHP uses: shivammathur/setup-php@v2 with: php-version: ${{ matrix.php }} extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, sqlite, pdo_sqlite, bcmath, soap, intl, gd, exif, iconv, imagick coverage: none - name: Install dependencies run: composer update --no-interaction --prefer-source --no-suggest - name: Execute tests run: vendor/bin/phpunit .github/workflows/update-changelog.yml 0000644 00000001205 15111416777 0014103 0 ustar 00 name: "Update Changelog" on: release: types: [released] jobs: update: runs-on: ubuntu-latest steps: - name: Checkout code uses: actions/checkout@v2 with: ref: main - name: Update Changelog uses: stefanzweifel/changelog-updater-action@v1 with: latest-version: ${{ github.event.release.name }} release-notes: ${{ github.event.release.body }} - name: Commit updated CHANGELOG uses: stefanzweifel/git-auto-commit-action@v4 with: branch: main commit_message: Update CHANGELOG file_pattern: CHANGELOG.md .github/workflows/php-cs-fixer.yml 0000644 00000000766 15111416777 0013214 0 ustar 00 name: Check & fix styling on: [push] jobs: php-cs-fixer: runs-on: ubuntu-latest steps: - name: Checkout code uses: actions/checkout@v2 with: ref: ${{ github.head_ref }} - name: Run PHP CS Fixer uses: docker://oskarstark/php-cs-fixer-ga with: args: --config=.php_cs.dist --allow-risky=yes - name: Commit changes uses: stefanzweifel/git-auto-commit-action@v4 with: commit_message: Fix styling
Simpan