pax_global_header00006660000000000000000000000064136032130020014500gustar00rootroot0000000000000052 comment=5a3887a3ae56ecf8b5f039c94afa84a482eca509 php-league-html-to-markdown-4.9.1/000077500000000000000000000000001360321300200167445ustar00rootroot00000000000000php-league-html-to-markdown-4.9.1/.github/000077500000000000000000000000001360321300200203045ustar00rootroot00000000000000php-league-html-to-markdown-4.9.1/.github/FUNDING.yml000066400000000000000000000000471360321300200221220ustar00rootroot00000000000000github: colinodell patreon: colinodell php-league-html-to-markdown-4.9.1/.github/stale.yml000066400000000000000000000012671360321300200221450ustar00rootroot00000000000000# Number of days of inactivity before an issue becomes stale daysUntilStale: 90 # Number of days of inactivity before a stale issue is closed daysUntilClose: 30 # Issues with these labels will never be considered stale exemptLabels: - pinned - on hold - security # Label to use when marking an issue as stale staleLabel: stale # Comment to post when marking an issue as stale. Set to `false` to disable markComment: > This issue has been automatically marked as stale because it has not had recent activity. It will be closed if no further activity occurs. Thank you for your contributions. # Comment to post when closing a stale issue. Set to `false` to disable closeComment: false php-league-html-to-markdown-4.9.1/.gitignore000066400000000000000000000000371360321300200207340ustar00rootroot00000000000000~* /build/ vendor composer.lockphp-league-html-to-markdown-4.9.1/.scrutinizer.yml000066400000000000000000000011051360321300200221230ustar00rootroot00000000000000filter: excluded_paths: [tests/*] checks: php: remove_extra_empty_lines: true remove_php_closing_tag: true remove_trailing_whitespace: true fix_use_statements: remove_unused: true preserve_multiple: false preserve_blanklines: true order_alphabetically: true fix_php_opening_tag: true fix_linefeed: true fix_line_ending: true fix_identation_4spaces: true fix_doc_comments: true tools: external_code_coverage: timeout: 1200 runs: 3 php-league-html-to-markdown-4.9.1/.styleci.yml000066400000000000000000000002321360321300200212160ustar00rootroot00000000000000preset: recommended enabled: - concat_with_spaces - strict disabled: - concat_without_spaces - phpdoc_short_description - short_array_syntax php-league-html-to-markdown-4.9.1/.travis.yml000066400000000000000000000010021360321300200210460ustar00rootroot00000000000000language: php dist: bionic php: - "7.1" - "7.2" - "7.3" - "7.4" matrix: include: - php: 5.3 dist: precise - php: 5.4 dist: trusty - php: 5.5 dist: trusty - php: 5.6 dist: xenial - php: 7.0 dist: xenial install: travis_retry composer install --no-interaction --prefer-source script: - vendor/bin/phpunit --coverage-text --coverage-clover=coverage.clover after_script: - vendor/bin/ocular code-coverage:upload --format=php-clover coverage.clover php-league-html-to-markdown-4.9.1/CHANGELOG.md000066400000000000000000000242361360321300200205640ustar00rootroot00000000000000# Change Log All notable changes to this project will be documented in this file. Updates should follow the [Keep a CHANGELOG](http://keepachangelog.com/) principles. ## [Unreleased][unreleased] ## [4.9.1] - 2019-12-27 ### Fixed - Fixed issue with HTML entity escaping in text (#184) ## [4.9.0] - 2019-11-02 ### Added - Added new option to preserve comments (#177, #179) ## [4.8.3] - 2019-10-31 ### Fixed - Fixed whitespace preservation around `` tags (#174, #178) ## [4.8.2] - 2019-08-02 ### Fixed - Fixed headers not being placed onto a new line in some cases (#172) - Fixed handling of links containing spaces (#175) ### Removed - Removed support for HHVM ## [4.8.1] - 2018-12-24 ### Added - Added support for PHP 7.3 ### Fixed - Fixed paragraphs following tables (#165, #166) - Fixed incorrect list item escaping (#168, #169) ## [4.8.0] - 2018-09-18 ### Added - Added support for email auto-linking - Added a new interface (`HtmlConverterInterface`) for the main `HtmlConverter` class - Added additional test cases (#14) ### Changed - The `italic_style` option now defaults to `'*'` so that in-word emphasis is handled properly (#75) ### Fixed - Fixed several issues of `` and `
` tags not converting to blocks or inlines properly (#26, #70, #102, #140, #161, #162)
 - Fixed in-word emphasis using underscores as delimiter (#75)
 - Fixed character escaping inside of `
` elements - Fixed header edge cases ### Deprecated - The `bold_style` and `italic_style` options have been deprecated (#75) ## [4.7.0] - 2018-05-19 ### Added - Added `setOptions()` function for chainable calling (#149) - Added new `list_item_style_alternate` option for converting every-other list with a different character (#155) ### Fixed - Fixed insufficient newlines after code blocks (#144, #148) - Fixed trailing spaces not being preserved in link anchors (#157) - Fixed list-like lines not being escaped inside of lists items (#159) ## [4.6.2] ### Fixed - Fixed issue with emphasized spaces (#146) ## [4.6.1] ### Fixed - Fixed conversion of `
` tags (#145)

## [4.6.0]
### Added
 - Added support for ordered lists starting at numbers other than 1

### Fixed
 - Fixed overly-eager escaping of list-like text (#141)

## [4.5.0]
### Added
 - Added configuration option for list item style (#135, #136)

## [4.4.1]

### Fixed
 - Fixed autolinking of invalid URLs (#129)

## [4.4.0]

### Added
 - Added `hard_break` configuration option (#112, #115)
 - The `HtmlConverter` can now be instantiated with an `Environment` (#118)

### Fixed
 - Fixed handling of paragraphs in list item elements (#47, #110)
 - Fixed phantom spaces when newlines follow `br` elements (#116, #117)
 - Fixed link converter not sanitizing inner spaces properly (#119, #120)

## [4.3.1]
### Changed
 - Revised the sanitization implementation (#109)

### Fixed
 - Fixed tag-like content not being escaped (#67, #109)
 - Fixed thematic break-like content not being escaped (#65, #109)
 - Fixed codefence-like content not being escaped (#64, #109)

## [4.3.0]
### Added
 - Added full support for PHP 7.0 and 7.1

### Changed
 - Changed `
` and `
` conversions to use backticks instead of indendation (#102)

### Fixed
 - Fixed issue where specified code language was not preserved (#70, #102)
 - Fixed issue where `` tags nested in `
` was not converted properly (#70, #102)
 - Fixed header-like content not being escaped (#76, #105)
 - Fixed blockquote-like content not being escaped (#77, #103)
 - Fixed ordered list-like content not being escaped (#73, #106)
 - Fixed unordered list-like content not being escaped (#71, #107)

## [4.2.2]
### Fixed
 - Fixed sanitization bug which sometimes removes desired content (#63, #101)

## [4.2.1]
### Fixed
 - Fixed path to autoload.php when used as a library (#98)
 - Fixed edge case for tags containing only whitespace (#99)

### Removed
 - Removed double HTML entity decoding, as this is not desireable (#60)

## [4.2.0]

### Added
 - Added the ability to invoke HtmlConverter objects as functions (#85)

### Fixed
 - Fixed improper handling of nested list items (#19 and #84)
 - Fixed preceeding or trailing spaces within emphasis tags (#83)

## [4.1.1]

### Fixed
 - Fixed conversion of empty paragraphs (#78)
 - Fixed `preg_replace` so it wouldn't break UTF-8 characters (#79)

## [4.1.0]

### Added
 - Added `bin/html-to-markdown` script

### Changed
 - Changed default italic character to `_` (#58)

## [4.0.1]

### Fixed
 - Added escaping to avoid * and _ in a text being rendered as emphasis (#48)

### Removed
 - Removed the demo (#51)
 - `.styleci.yml` and `CONTRIBUTING.md` are no longer included in distributions (#50)

## [4.0.0]

This release changes the visibility of several methods/properties. #42 and #43 brought to light that some visiblities were
not ideally set, so this releases fixes that. Moving forwards this should reduce the chance of introducing BC-breaking changes.

### Added
 - Added new `HtmlConverter::getEnvironment()` method to expose the `Environment` (#42, #43)

### Changed
 - Changed `Environment::addConverter()` from `protected` to `public`, enabling custom converters to be added (#42, #43)
 - Changed `HtmlConverter::createDOMDocument()` from `protected` to `private`
 - Changed `Element::nextCached` from `protected` to `private`
 - Made the `Environment` class `final`

## [3.1.1]
### Fixed
 - Empty HTML strings now result in empty Markdown documents (#40, #41)

## [3.1.0]
### Added
 - Added new `equals` method to `Element` to check for equality

### Changes
 - Use Linux line endings consistently instead of plaform-specific line endings (#36)

### Fixed
 - Cleaned up code style

## [3.0.0]
### Changed
 - Changed namespace to `League\HTMLToMarkdown`
 - Changed packagist name to `league/html-to-markdown`
 - Re-organized code into several separate classes
 - `` tags with identical href and inner text are now rendered using angular bracket syntax (#31)
 - `
` elements are now treated as block-level elements (#33) ## [2.2.2] ### Added - Added support for PHP 5.6 and HHVM - Enabled testing against PHP 7 nightlies - Added this CHANGELOG.md ### Fixed - Fixed whitespace preservation between inline elements (#9 and #10) ## [2.2.1] ### Fixed - Preserve placeholder links (#22) ## [2.2.0] ### Added - Added CircleCI config ### Changed - `
` blocks are now treated as code elements

### Removed
 - Dropped support for PHP 5.2
 - Removed incorrect README comment regarding `#text` nodes (#17)

## [2.1.2]
### Added
 - Added the ability to blacklist/remove specific node types (#11)

### Changed
 - Line breaks are now placed after divs instead of before them
 - Newlines inside of link texts are now removed
 - Updated the minimum PHPUnit version to 4.*

## [2.1.1]
### Added
 - Added options to customize emphasis characters

## [2.1.0]
### Added
 - Added option to strip HTML tags without Markdown equivalents
 - Added `convert()` method for converter reuse
 - Added ability to set options after instance construction
 - Documented the required PHP extensions (#4)

### Changed
 - ATX style now used for h1 and h2 tags inside blockquotes

### Fixed
 - Newlines inside blockquotes are now started with a bracket
 - Fixed some incorrect docblocks
 - `__toString()` now returns an empty string if input is empty
 - Convert head tag if body tag is empty (#7)
 - Preserve special characters inside tags without md equivalents (#6)


## [2.0.1]
### Fixed
 - Fixed first line indentation for multi-line code blocks
 - Fixed consecutive anchors get separating spaces stripped (#3)

## [2.0.0]
### Added
 - Initial release

[unreleased]: https://github.com/thephpleague/html-to-markdown/compare/4.9.1...master
[4.9.1]: https://github.com/thephpleague/html-to-markdown/compare/4.9.0...4.9.1
[4.9.0]: https://github.com/thephpleague/html-to-markdown/compare/4.8.3...4.9.0
[4.8.3]: https://github.com/thephpleague/html-to-markdown/compare/4.8.2...4.8.3
[4.8.2]: https://github.com/thephpleague/html-to-markdown/compare/4.8.1...4.8.2
[4.8.1]: https://github.com/thephpleague/html-to-markdown/compare/4.8.0...4.8.1
[4.8.0]: https://github.com/thephpleague/html-to-markdown/compare/4.7.0...4.8.0
[4.7.0]: https://github.com/thephpleague/html-to-markdown/compare/4.6.2...4.7.0
[4.6.2]: https://github.com/thephpleague/html-to-markdown/compare/4.6.1...4.6.2
[4.6.1]: https://github.com/thephpleague/html-to-markdown/compare/4.6.0...4.6.1
[4.6.0]: https://github.com/thephpleague/html-to-markdown/compare/4.5.0...4.6.0
[4.5.0]: https://github.com/thephpleague/html-to-markdown/compare/4.4.1...4.5.0
[4.4.1]: https://github.com/thephpleague/html-to-markdown/compare/4.4.0...4.4.1
[4.4.0]: https://github.com/thephpleague/html-to-markdown/compare/4.3.1...4.4.0
[4.3.1]: https://github.com/thephpleague/html-to-markdown/compare/4.3.0...4.3.1
[4.3.0]: https://github.com/thephpleague/html-to-markdown/compare/4.2.2...4.3.0
[4.2.2]: https://github.com/thephpleague/html-to-markdown/compare/4.2.1...4.2.2
[4.2.1]: https://github.com/thephpleague/html-to-markdown/compare/4.2.0...4.2.1
[4.2.0]: https://github.com/thephpleague/html-to-markdown/compare/4.1.1...4.2.0
[4.1.1]: https://github.com/thephpleague/html-to-markdown/compare/4.1.0...4.1.1
[4.1.0]: https://github.com/thephpleague/html-to-markdown/compare/4.0.1...4.1.0
[4.0.1]: https://github.com/thephpleague/html-to-markdown/compare/4.0.0...4.0.1
[4.0.0]: https://github.com/thephpleague/html-to-markdown/compare/3.1.1...4.0.0
[3.1.1]: https://github.com/thephpleague/html-to-markdown/compare/3.1.0...3.1.1
[3.1.0]: https://github.com/thephpleague/html-to-markdown/compare/3.0.0...3.1.0
[3.0.0]: https://github.com/thephpleague/html-to-markdown/compare/2.2.2...3.0.0
[2.2.2]: https://github.com/thephpleague/html-to-markdown/compare/2.2.1...2.2.2
[2.2.1]: https://github.com/thephpleague/html-to-markdown/compare/2.2.0...2.2.1
[2.2.0]: https://github.com/thephpleague/html-to-markdown/compare/2.1.2...2.2.0
[2.1.2]: https://github.com/thephpleague/html-to-markdown/compare/2.1.1...2.1.2
[2.1.1]: https://github.com/thephpleague/html-to-markdown/compare/2.1.0...2.1.1
[2.1.0]: https://github.com/thephpleague/html-to-markdown/compare/2.0.1...2.1.0
[2.0.1]: https://github.com/thephpleague/html-to-markdown/compare/2.0.0...2.0.1
[2.0.0]: https://github.com/thephpleague/html-to-markdown/compare/775f91e...2.0.0

php-league-html-to-markdown-4.9.1/CONDUCT.md000066400000000000000000000036751360321300200204000ustar00rootroot00000000000000# Contributor Code of Conduct

As contributors and maintainers of this project, and in the interest of fostering an open and welcoming community, we pledge to respect all people who contribute through reporting issues, posting feature requests, updating documentation, submitting pull requests or patches, and other activities.

We are committed to making participation in this project a harassment-free experience for everyone, regardless of level of experience, gender, gender identity and expression, sexual orientation, disability, personal appearance, body size, race, ethnicity, age, religion, or nationality.

Examples of unacceptable behavior by participants include:

* The use of sexualized language or imagery
* Personal attacks
* Trolling or insulting/derogatory comments
* Public or private harassment
* Publishing other's private information, such as physical or electronic addresses, without explicit permission
* Other unethical or unprofessional conduct.

Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct. By adopting this Code of Conduct, project maintainers commit themselves to fairly and consistently applying these principles to every aspect of managing this project. Project maintainers who do not follow or enforce the Code of Conduct may be permanently removed from the project team.

This code of conduct applies both within project spaces and in public spaces when an individual is representing the project or its community.

Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by opening an issue or contacting one or more of the project maintainers.

This Code of Conduct is adapted from the [Contributor Covenant](http://contributor-covenant.org), version 1.2.0, available at [http://contributor-covenant.org/version/1/2/0/](http://contributor-covenant.org/version/1/2/0/)
php-league-html-to-markdown-4.9.1/CONTRIBUTING.md000066400000000000000000000025071360321300200212010ustar00rootroot00000000000000# Contributing

Contributions are **welcome** and will be fully **credited**.

We accept contributions via Pull Requests on [Github](https://github.com/thephpleague/html-to-markdown).


## Pull Requests

- **[PSR-2 Coding Standard](https://github.com/php-fig/fig-standards/blob/master/accepted/PSR-2-coding-style-guide.md)** - The easiest way to apply the conventions is to install [PHP Code Sniffer](http://pear.php.net/package/PHP_CodeSniffer).

- **Add tests!** - Your patch won't be accepted if it doesn't have tests.

- **Document any change in behaviour** - Make sure the `README.md` and any other relevant documentation are kept up-to-date.

- **Consider our release cycle** - We try to follow [SemVer v2.0.0](http://semver.org/). Randomly breaking public APIs is not an option.

- **Create feature branches** - Don't ask us to pull from your master branch.

- **One pull request per feature** - If you want to do more than one thing, send multiple pull requests.

- **Send coherent history** - Make sure each individual commit in your pull request is meaningful. If you had to make multiple intermediate commits while developing, please [squash them](http://www.git-scm.com/book/en/v2/Git-Tools-Rewriting-History#Changing-Multiple-Commit-Messages) before submitting.


## Running Tests

``` bash
$ ./vendor/bin/phpunit
```


**Happy coding**!
php-league-html-to-markdown-4.9.1/LICENSE000066400000000000000000000021311360321300200177460ustar00rootroot00000000000000The MIT License (MIT)

Copyright (c) 2015 Colin O'Dell

Originally created by Nick Cernis

Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.php-league-html-to-markdown-4.9.1/README.md000066400000000000000000000214441360321300200202300ustar00rootroot00000000000000HTML To Markdown for PHP
========================

[![Join the chat at https://gitter.im/thephpleague/html-to-markdown](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/thephpleague/html-to-markdown?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)

[![Latest Version](https://img.shields.io/packagist/v/league/html-to-markdown.svg?style=flat-square)](https://packagist.org/packages/league/html-to-markdown)
[![Software License](http://img.shields.io/badge/license-MIT-brightgreen.svg?style=flat-square)](LICENSE)
[![Build Status](https://img.shields.io/travis/thephpleague/html-to-markdown/master.svg?style=flat-square)](https://travis-ci.org/thephpleague/html-to-markdown)
[![Coverage Status](https://img.shields.io/scrutinizer/coverage/g/thephpleague/html-to-markdown.svg?style=flat-square)](https://scrutinizer-ci.com/g/thephpleague/html-to-markdown/code-structure)
[![Quality Score](https://img.shields.io/scrutinizer/g/thephpleague/html-to-markdown.svg?style=flat-square)](https://scrutinizer-ci.com/g/thephpleague/html-to-markdown)
[![Total Downloads](https://img.shields.io/packagist/dt/league/html-to-markdown.svg?style=flat-square)](https://packagist.org/packages/league/html-to-markdown)

Library which converts HTML to [Markdown](http://daringfireball.net/projects/markdown/) for your sanity and convenience.


**Requires**: PHP 5.3+ or PHP 7.0+

**Lead Developer**: [@colinodell](http://twitter.com/colinodell)

**Original Author**: [@nickcernis](http://twitter.com/nickcernis)


### Why convert HTML to Markdown?

*"What alchemy is this?"* you mutter. *"I can see why you'd convert [Markdown to HTML](https://github.com/thephpleague/commonmark),"* you continue, already labouring the question somewhat, *"but why go the other way?"*

Typically you would convert HTML to Markdown if:

1. You have an existing HTML document that needs to be edited by people with good taste.
2. You want to store new content in HTML format but edit it as Markdown.
3. You want to convert HTML email to plain text email.
4. You know a guy who's been converting HTML to Markdown for years, and now he can speak Elvish. You'd quite like to be able to speak Elvish.
5. You just really like Markdown.

### How to use it

Require the library by issuing this command:

```bash
composer require league/html-to-markdown
```

Add `require 'vendor/autoload.php';` to the top of your script.

Next, create a new HtmlConverter instance, passing in your valid HTML code to its `convert()` function:

```php
use League\HTMLToMarkdown\HtmlConverter;

$converter = new HtmlConverter();

$html = "

Quick, to the Batpoles!

"; $markdown = $converter->convert($html); ``` The `$markdown` variable now contains the Markdown version of your HTML as a string: ```php echo $markdown; // ==> ### Quick, to the Batpoles! ``` The included `demo` directory contains an HTML->Markdown conversion form to try out. ### Conversion options By default, HTML To Markdown preserves HTML tags without Markdown equivalents, like `` and `
`. To strip HTML tags that don't have a Markdown equivalent while preserving the content inside them, set `strip_tags` to true, like this: ```php $converter = new HtmlConverter(array('strip_tags' => true)); $html = 'Turnips!'; $markdown = $converter->convert($html); // $markdown now contains "Turnips!" ``` Or more explicitly, like this: ```php $converter = new HtmlConverter(); $converter->getConfig()->setOption('strip_tags', true); $html = 'Turnips!'; $markdown = $converter->convert($html); // $markdown now contains "Turnips!" ``` Note that only the tags themselves are stripped, not the content they hold. To strip tags and their content, pass a space-separated list of tags in `remove_nodes`, like this: ```php $converter = new HtmlConverter(array('remove_nodes' => 'span div')); $html = 'Turnips!
Monkeys!
'; $markdown = $converter->convert($html); // $markdown now contains "" ``` By default, all comments are stripped from the content. To preserve them, use the `preserve_comments` option, like this: ```php $converter = new HtmlConverter(array('preserve_comments' => true)); $html = 'Turnips!'; $markdown = $converter->convert($html); // $markdown now contains "Turnips!" ``` To preserve only specific comments, set `preserve_comments` with an array of strings, like this: ```php $converter = new HtmlConverter(array('preserve_comments' => array('Eggs!'))); $html = 'Turnips!'; $markdown = $converter->convert($html); // $markdown now contains "Turnips!" ``` ### Style options By default bold tags are converted using the asterisk syntax, and italic tags are converted using the underlined syntax. Change these by using the `bold_style` and `italic_style` options. ```php $converter = new HtmlConverter(); $converter->getConfig()->setOption('italic_style', '*'); $converter->getConfig()->setOption('bold_style', '__'); $html = 'Italic and a bold'; $markdown = $converter->convert($html); // $markdown now contains "*Italic* and a __bold__" ``` ### Line break options By default, `br` tags are converted to two spaces followed by a newline character as per [traditional Markdown](https://daringfireball.net/projects/markdown/syntax#p). Set `hard_break` to `true` to omit the two spaces, as per GitHub Flavored Markdown (GFM). ```php $converter = new HtmlConverter(); $html = '

test
line break

'; $converter->getConfig()->setOption('hard_break', true); $markdown = $converter->convert($html); // $markdown now contains "test\nline break" $converter->getConfig()->setOption('hard_break', false); // default $markdown = $converter->convert($html); // $markdown now contains "test \nline break" ``` ### Passing custom Environment object You can pass current `Environment` object to customize i.e. which converters should be used. ```php $environment = new Environment(array( // your configuration here )); $environment->addConverter(new HeaderConverter()); // optionally - add converter manually $converter = new HtmlConverter($environment); $html = '

Header

'; $markdown = $converter->convert($html); // $markdown now contains "### Header" and "" ``` ### Limitations - Markdown Extra, MultiMarkdown and other variants aren't supported – just Markdown. ### Known issues - Nested lists and lists containing multiple paragraphs aren't converted correctly. - Lists inside blockquotes aren't converted correctly. - Any reported [open issues here](https://github.com/thephpleague/html-to-markdown/issues?state=open). [Report your issue or request a feature here.](https://github.com/thephpleague/html-to-markdown/issues/new) Issues with patches or failing tests are especially welcome. ### Style notes - Setext (underlined) headers are the default for H1 and H2. If you prefer the ATX style for H1 and H2 (# Header 1 and ## Header 2), set `header_style` to 'atx' in the options array when you instantiate the object: `$converter = new HtmlConverter(array('header_style'=>'atx'));` Headers of H3 priority and lower always use atx style. - Links and images are referenced inline. Footnote references (where image src and anchor href attributes are listed in the footnotes) are not used. - Blockquotes aren't line wrapped – it makes the converted Markdown easier to edit. ### Dependencies HTML To Markdown requires PHP's [xml](http://www.php.net/manual/en/xml.installation.php), [lib-xml](http://www.php.net/manual/en/libxml.installation.php), and [dom](http://www.php.net/manual/en/dom.installation.php) extensions, all of which are enabled by default on most distributions. Errors such as "Fatal error: Class 'DOMDocument' not found" on distributions such as CentOS that disable PHP's xml extension can be resolved by installing php-xml. ### Contributors Many thanks to all [contributors](https://github.com/thephpleague/html-to-markdown/graphs/contributors) so far. Further improvements and feature suggestions are very welcome. ### How it works HTML To Markdown creates a DOMDocument from the supplied HTML, walks through the tree, and converts each node to a text node containing the equivalent markdown, starting from the most deeply nested node and working inwards towards the root node. ### To-do - Support for nested lists and lists inside blockquotes. - Offer an option to preserve tags as HTML if they contain attributes that can't be represented with Markdown (e.g. `style`). ### Trying to convert Markdown to HTML? Use one of these great libraries: - [league/commonmark](https://github.com/thephpleague/commonmark) (recommended) - [cebe/markdown](https://github.com/cebe/markdown) - [PHP Markdown](https://michelf.ca/projects/php-markdown/) - [Parsedown](https://github.com/erusev/parsedown) No guarantees about the Elvish, though. php-league-html-to-markdown-4.9.1/bin/000077500000000000000000000000001360321300200175145ustar00rootroot00000000000000php-league-html-to-markdown-4.9.1/bin/html-to-markdown000077500000000000000000000040401360321300200226440ustar00rootroot00000000000000#!/usr/bin/env php $arg) { if ($i === 0) { continue; } if (substr($arg, 0, 1) === '-') { switch ($arg) { case '-h': case '--help': echo getHelpText(); exit(0); default: fail('Unknown option: ' . $arg); } } else { $src = $argv[1]; } } if (isset($src)) { if (!file_exists($src)) { fail('File not found: ' . $src); } $html = file_get_contents($src); } else { $stdin = fopen('php://stdin', 'r'); stream_set_blocking($stdin, false); $html = stream_get_contents($stdin); fclose($stdin); if (empty($html)) { fail(getHelpText()); } } $converter = new League\HTMLToMarkdown\HtmlConverter(); echo $converter->convert($html); /** * Get help and usage info * * @return string */ function getHelpText() { return << output.md Converting from STDIN: echo -e '

Hello World!

' | html-to-markdown Converting from STDIN and saving the output: echo -e '

Hello World!

' | html-to-markdown > output.md HELP; } /** * @param string $message Error message */ function fail($message) { fwrite(STDERR, $message . "\n"); exit(1); } function requireAutoloader() { $autoloadPaths = array( // Local package usage __DIR__ . '/../vendor/autoload.php', // Package was included as a library __DIR__ . '/../../../autoload.php', ); foreach ($autoloadPaths as $path) { if (file_exists($path)) { require_once $path; break; } } } php-league-html-to-markdown-4.9.1/circle.yml000066400000000000000000000000411360321300200207230ustar00rootroot00000000000000test: override: - phpunit php-league-html-to-markdown-4.9.1/composer.json000066400000000000000000000023451360321300200214720ustar00rootroot00000000000000{ "name": "league/html-to-markdown", "type": "library", "description": "An HTML-to-markdown conversion helper for PHP", "keywords": ["markdown", "html"], "homepage": "https://github.com/thephpleague/html-to-markdown", "license": "MIT", "authors": [ { "name": "Colin O'Dell", "email": "colinodell@gmail.com", "homepage": "https://www.colinodell.com", "role": "Lead Developer" }, { "name": "Nick Cernis", "email": "nick@cern.is", "homepage": "http://modernnerd.net", "role": "Original Author" } ], "autoload": { "psr-4": { "League\\HTMLToMarkdown\\": "src/" } }, "autoload-dev": { "psr-4": { "League\\HTMLToMarkdown\\Test\\": "tests" } }, "require": { "php": ">=5.3.3", "ext-dom": "*", "ext-xml": "*" }, "require-dev": { "mikehaertl/php-shellcommand": "~1.1.0", "phpunit/phpunit": "^4.8|^5.7", "scrutinizer/ocular": "~1.1" }, "bin": ["bin/html-to-markdown"], "extra": { "branch-alias": { "dev-master": "4.10-dev" } } } php-league-html-to-markdown-4.9.1/phpunit.xml.dist000066400000000000000000000020401360321300200221130ustar00rootroot00000000000000 tests src/ php-league-html-to-markdown-4.9.1/src/000077500000000000000000000000001360321300200175335ustar00rootroot00000000000000php-league-html-to-markdown-4.9.1/src/Configuration.php000066400000000000000000000035441360321300200230610ustar00rootroot00000000000000config = $config; $this->checkForDeprecatedOptions($config); } /** * @param array $config */ public function merge(array $config = array()) { $this->checkForDeprecatedOptions($config); $this->config = array_replace_recursive($this->config, $config); } /** * @param array $config */ public function replace(array $config = array()) { $this->checkForDeprecatedOptions($config); $this->config = $config; } /** * @param string $key * @param mixed $value */ public function setOption($key, $value) { $this->checkForDeprecatedOptions(array($key => $value)); $this->config[$key] = $value; } /** * @param string|null $key * @param mixed|null $default * * @return mixed|null */ public function getOption($key = null, $default = null) { if ($key === null) { return $this->config; } if (!isset($this->config[$key])) { return $default; } return $this->config[$key]; } private function checkForDeprecatedOptions(array $config) { foreach ($config as $key => $value) { if ($key === 'bold_style' && $value !== '**') { @trigger_error('Customizing the bold_style option is deprecated and may be removed in the next major version', E_USER_DEPRECATED); } elseif ($key === 'italic_style' && $value !== '*') { @trigger_error('Customizing the italic_style option is deprecated and may be removed in the next major version', E_USER_DEPRECATED); } } } } php-league-html-to-markdown-4.9.1/src/ConfigurationAwareInterface.php000066400000000000000000000002751360321300200256600ustar00rootroot00000000000000' symbols to each line. $markdown = ''; $quote_content = trim($element->getValue()); $lines = preg_split('/\r\n|\r|\n/', $quote_content); $total_lines = count($lines); foreach ($lines as $i => $line) { $markdown .= '> ' . $line . "\n"; if ($i + 1 === $total_lines) { $markdown .= "\n"; } } return $markdown; } /** * @return string[] */ public function getSupportedTags() { return array('blockquote'); } } php-league-html-to-markdown-4.9.1/src/Converter/CodeConverter.php000066400000000000000000000044441360321300200247630ustar00rootroot00000000000000getAttribute('class'); if ($classes) { // Since tags can have more than one class, we need to find the one that starts with 'language-' $classes = explode(' ', $classes); foreach ($classes as $class) { if (strpos($class, 'language-') !== false) { // Found one, save it as the selected language and stop looping over the classes. $language = str_replace('language-', '', $class); break; } } } $markdown = ''; $code = html_entity_decode($element->getChildrenAsString()); // In order to remove the code tags we need to search for them and, in the case of the opening tag // use a regular expression to find the tag and the other attributes it might have $code = preg_replace('/]*>/', '', $code); $code = str_replace('
', '', $code); // Checking if it's a code block or span if ($this->shouldBeBlock($element, $code)) { // Code block detected, newlines will be added in parent $markdown .= '```' . $language . "\n" . $code . "\n" . '```'; } else { // One line of code, wrapping it on one backtick, removing new lines $markdown .= '`' . preg_replace('/\r\n|\r|\n/', '', $code) . '`'; } return $markdown; } /** * @return string[] */ public function getSupportedTags() { return array('code'); } /** * @param ElementInterface $element * @param string $code * * @return bool */ private function shouldBeBlock(ElementInterface $element, $code) { if ($element->getParent()->getTagName() == 'pre') { return true; } if (preg_match('/[^\s]` `/', $code)) { return true; } return false; } } php-league-html-to-markdown-4.9.1/src/Converter/CommentConverter.php000066400000000000000000000025261360321300200255120ustar00rootroot00000000000000config = $config; } /** * @param ElementInterface $element * * @return string */ public function convert(ElementInterface $element) { if ($this->shouldPreserve($element)) { return ''; } return ''; } /** * @return string[] */ public function getSupportedTags() { return array('#comment'); } /** * @param ElementInterface $element * * @return bool */ private function shouldPreserve(ElementInterface $element) { $preserve = $this->config->getOption('preserve_comments'); if ($preserve === true) { return true; } if (is_array($preserve)) { $value = trim($element->getValue()); return in_array($value, $preserve); } return false; } } php-league-html-to-markdown-4.9.1/src/Converter/ConverterInterface.php000066400000000000000000000005371360321300200260100ustar00rootroot00000000000000config = $config; } /** * @param ElementInterface $element * * @return string */ public function convert(ElementInterface $element) { // If strip_tags is false (the default), preserve tags that don't have Markdown equivalents, // such as nodes on their own. C14N() canonicalizes the node to a string. // See: http://www.php.net/manual/en/domnode.c14n.php if ($this->config->getOption('strip_tags', false)) { return $element->getValue(); } $markdown = html_entity_decode($element->getChildrenAsString()); if ($element->getTagName() === 'table') { $markdown .= "\n\n"; } return $markdown; } /** * @return string[] */ public function getSupportedTags() { return array(self::DEFAULT_CONVERTER); } } php-league-html-to-markdown-4.9.1/src/Converter/DivConverter.php000066400000000000000000000016721360321300200246330ustar00rootroot00000000000000config = $config; } /** * @param ElementInterface $element * * @return string */ public function convert(ElementInterface $element) { if ($this->config->getOption('strip_tags', false)) { return $element->getValue() . "\n\n"; } return html_entity_decode($element->getChildrenAsString()); } /** * @return string[] */ public function getSupportedTags() { return array('div'); } } php-league-html-to-markdown-4.9.1/src/Converter/EmphasisConverter.php000066400000000000000000000024351360321300200256600ustar00rootroot00000000000000config = $config; } /** * @param ElementInterface $element * * @return string */ public function convert(ElementInterface $element) { $tag = $element->getTagName(); $value = $element->getValue(); if (!trim($value)) { return $value; } if ($tag === 'i' || $tag === 'em') { $style = $this->config->getOption('italic_style'); } else { $style = $this->config->getOption('bold_style'); } $prefix = ltrim($value) !== $value ? ' ' : ''; $suffix = rtrim($value) !== $value ? ' ' : ''; return $prefix . $style . trim($value) . $style . $suffix; } /** * @return string[] */ public function getSupportedTags() { return array('em', 'i', 'strong', 'b'); } } php-league-html-to-markdown-4.9.1/src/Converter/HardBreakConverter.php000066400000000000000000000024411360321300200257270ustar00rootroot00000000000000config = $config; } /** * @param ElementInterface $element * * @return string */ public function convert(ElementInterface $element) { $return = $this->config->getOption('hard_break') ? "\n" : " \n"; $next = $element->getNext(); if ($next) { $next_value = $next->getValue(); if ($next_value) { if (in_array(substr($next_value, 0, 2), array('- ', '* ', '+ '))) { $parent = $element->getParent(); if ($parent && $parent->getTagName() == 'li') { $return .= '\\'; } } } } return $return; } /** * @return string[] */ public function getSupportedTags() { return array('br'); } } php-league-html-to-markdown-4.9.1/src/Converter/HeaderConverter.php000066400000000000000000000037541360321300200253040ustar00rootroot00000000000000config = $config; } /** * @param ElementInterface $element * * @return string */ public function convert(ElementInterface $element) { $level = (int) substr($element->getTagName(), 1, 1); $style = $this->config->getOption('header_style', self::STYLE_SETEXT); if (strlen($element->getValue()) === 0) { return "\n"; } if (($level === 1 || $level === 2) && !$element->isDescendantOf('blockquote') && $style === self::STYLE_SETEXT) { return $this->createSetextHeader($level, $element->getValue()); } return $this->createAtxHeader($level, $element->getValue()); } /** * @return string[] */ public function getSupportedTags() { return array('h1', 'h2', 'h3', 'h4', 'h5', 'h6'); } /** * @param int $level * @param string $content * * @return string */ private function createSetextHeader($level, $content) { $length = function_exists('mb_strlen') ? mb_strlen($content, 'utf-8') : strlen($content); $underline = ($level === 1) ? '=' : '-'; return $content . "\n" . str_repeat($underline, $length) . "\n\n"; } /** * @param int $level * @param string $content * * @return string */ private function createAtxHeader($level, $content) { $prefix = str_repeat('#', $level) . ' '; return $prefix . $content . "\n\n"; } } php-league-html-to-markdown-4.9.1/src/Converter/HorizontalRuleConverter.php000066400000000000000000000007221360321300200270650ustar00rootroot00000000000000getAttribute('src'); $alt = $element->getAttribute('alt'); $title = $element->getAttribute('title'); if ($title !== '') { // No newlines added. should be in a block-level element. return '![' . $alt . '](' . $src . ' "' . $title . '")'; } return '![' . $alt . '](' . $src . ')'; } /** * @return string[] */ public function getSupportedTags() { return array('img'); } } php-league-html-to-markdown-4.9.1/src/Converter/LinkConverter.php000066400000000000000000000032641360321300200250050ustar00rootroot00000000000000getAttribute('href'); $title = $element->getAttribute('title'); $text = trim($element->getValue(), "\t\n\r\0\x0B"); if ($title !== '') { $markdown = '[' . $text . '](' . $href . ' "' . $title . '")'; } elseif ($href === $text && $this->isValidAutolink($href)) { $markdown = '<' . $href . '>'; } elseif ($href === 'mailto:' . $text && $this->isValidEmail($text)) { $markdown = '<' . $text . '>'; } else { if (stristr($href, ' ')) { $href = '<'.$href.'>'; } $markdown = '[' . $text . '](' . $href . ')'; } if (!$href) { $markdown = html_entity_decode($element->getChildrenAsString()); } return $markdown; } /** * @return string[] */ public function getSupportedTags() { return array('a'); } /** * @param string $href * * @return bool */ private function isValidAutolink($href) { return preg_match('/^[A-Za-z][A-Za-z0-9.+-]{1,31}:[^<>\x00-\x20]*/i', $href) === 1; } /** * @param string $email * * @return bool */ private function isValidEmail($email) { // Email validation is messy business, but this should cover most cases return filter_var($email, FILTER_VALIDATE_EMAIL); } } php-league-html-to-markdown-4.9.1/src/Converter/ListBlockConverter.php000066400000000000000000000007351360321300200257760ustar00rootroot00000000000000getValue() . "\n"; } /** * @return string[] */ public function getSupportedTags() { return array('ol', 'ul'); } } php-league-html-to-markdown-4.9.1/src/Converter/ListItemConverter.php000066400000000000000000000046271360321300200256460ustar00rootroot00000000000000config = $config; } /** * @param ElementInterface $element * * @return string */ public function convert(ElementInterface $element) { // If parent is an ol, use numbers, otherwise, use dashes $list_type = $element->getParent()->getTagName(); // Add spaces to start for nested list items $level = $element->getListItemLevel($element); $prefixForParagraph = str_repeat(' ', $level + 1); $value = trim(implode("\n" . $prefixForParagraph, explode("\n", trim($element->getValue())))); // If list item is the first in a nested list, add a newline before it $prefix = ''; if ($level > 0 && $element->getSiblingPosition() === 1) { $prefix = "\n"; } if ($list_type === 'ul') { $list_item_style = $this->config->getOption('list_item_style', '-'); $list_item_style_alternate = $this->config->getOption('list_item_style_alternate'); if (!isset($this->listItemStyle)) { $this->listItemStyle = $list_item_style_alternate ? $list_item_style_alternate : $list_item_style; } if ($list_item_style_alternate && $level == 0 && $element->getSiblingPosition() === 1) { $this->listItemStyle = $this->listItemStyle == $list_item_style ? $list_item_style_alternate : $list_item_style; } return $prefix . $this->listItemStyle . ' ' . $value . "\n"; } if ($list_type === 'ol' && $start = $element->getParent()->getAttribute('start')) { $number = $start + $element->getSiblingPosition() - 1; } else { $number = $element->getSiblingPosition(); } return $prefix . $number . '. ' . $value . "\n"; } /** * @return string[] */ public function getSupportedTags() { return array('li'); } } php-league-html-to-markdown-4.9.1/src/Converter/ParagraphConverter.php000066400000000000000000000057151360321300200260200ustar00rootroot00000000000000getValue(); $markdown = ''; $lines = preg_split('/\r\n|\r|\n/', $value); foreach ($lines as $line) { /* * Some special characters need to be escaped based on the position that they appear * The following function will deal with those special cases. */ $markdown .= $this->escapeSpecialCharacters($line); $markdown .= "\n"; } return trim($markdown) !== '' ? rtrim($markdown) . "\n\n" : ''; } /** * @return string[] */ public function getSupportedTags() { return array('p'); } /** * @param string $line * * @return string */ private function escapeSpecialCharacters($line) { $line = $this->escapeFirstCharacters($line); $line = $this->escapeOtherCharacters($line); $line = $this->escapeOtherCharactersRegex($line); return $line; } /** * @param string $line * * @return string */ private function escapeFirstCharacters($line) { $escapable = array( '>', '- ', '+ ', '--', '~~~', '---', '- - -' ); foreach ($escapable as $i) { if (strpos(ltrim($line), $i) === 0) { // Found a character that must be escaped, adding a backslash before return '\\' . ltrim($line); } } return $line; } /** * @param string $line * * @return string */ private function escapeOtherCharacters($line) { $escapable = array( '', 'Test'); $this->html_gives_markdown('

Test

', 'Test', array('strip_tags' => true)); } public function test_preserve_comments() { $this->html_gives_markdown('

Test

', "Test\n\n", array('preserve_comments' => true)); $this->html_gives_markdown('

Test

', "Test\n\n", array('preserve_comments' => array('more'))); $this->html_gives_markdown('

Test

', "Test\n\n", array('preserve_comments' => array('more'))); } public function test_preserve_whitespace() { $this->html_gives_markdown('
google.com test', '[google.com](google.com) `test`'); } public function test_delete_blank_p() { $this->html_gives_markdown('

', ''); $this->html_gives_markdown('

', '', array('strip_tags' => true)); } public function test_divs() { $this->html_gives_markdown('
Hello
World
', '
Hello
World
'); $this->html_gives_markdown('
Hello
World
', "Hello\n\nWorld", array('strip_tags' => true)); $this->html_gives_markdown("
Hello
\n
World
", "Hello\n\nWorld", array('strip_tags' => true)); $this->html_gives_markdown('

Paragraph

Hello
World
', "Paragraph\n\nHello\n\nWorld", array('strip_tags' => true)); } public function test_remove_nodes() { $this->html_gives_markdown('
Hello
World
', '', array('remove_nodes' => 'div')); $this->html_gives_markdown('

Hello

World', '', array('remove_nodes' => 'p span')); } public function test_html_entities() { $this->html_gives_markdown('

&euro;

', '&euro;'); $this->html_gives_markdown('<p>Some sample HTML</p>', '`

Some sample HTML

`'); } public function test_set_option() { $markdown = new HtmlConverter(); $markdown->getConfig()->setOption('strip_tags', true); $result = $markdown->convert('Strip'); $this->assertEquals('Strip', $result); } public function test_invoke() { $markdown = new HtmlConverter(); $markdown->getConfig()->setOption('strip_tags', true); $result = $markdown('Strip'); $this->assertEquals('Strip', $result); } public function test_sanitization() { $html = '
<script type = "text/javascript"> function startTimer() { var tim = window.setTimeout("hideMessage()", 5000) } </head> <body>
'; $markdown = '```' . "\n" . '