pax_global_header00006660000000000000000000000064127356622340014524gustar00rootroot0000000000000052 comment=bad0e8afc3809cb543a3f34068ac4b3160de86b3 Unicode-LineBreak-Unicode-LineBreak-2016.007_02/000077500000000000000000000000001273566223400207225ustar00rootroot00000000000000Unicode-LineBreak-Unicode-LineBreak-2016.007_02/.gitignore000066400000000000000000000001271273566223400227120ustar00rootroot00000000000000MYMETA.json MYMETA.yml Makefile LineBreak.bs LineBreak.c LineBreak.o blib/ pm_to_blib Unicode-LineBreak-Unicode-LineBreak-2016.007_02/.gitmodules000066400000000000000000000002701273566223400230760ustar00rootroot00000000000000[submodule "test-data"] path = test-data url = https://github.com/hatukanezumi/sombok-test-data [submodule "sombok"] path = sombok url = https://github.com/hatukanezumi/sombok.git Unicode-LineBreak-Unicode-LineBreak-2016.007_02/ARTISTIC000066400000000000000000000144631273566223400220770ustar00rootroot00000000000000The "Artistic License" Preamble The intent of this document is to state the conditions under which a Package may be copied, such that the Copyright Holder maintains some semblance of artistic control over the development of the package, while giving the users of the package the right to use and distribute the Package in a more-or-less customary fashion, plus the right to make reasonable modifications. Definitions "Package" refers to the collection of files distributed by the Copyright Holder, and derivatives of that collection of files created through textual modification. "Standard Version" refers to such a Package if it has not been modified, or has been modified in accordance with the wishes of the Copyright Holder as specified below. "Copyright Holder" is whoever is named in the copyright or copyrights for the package. "You" is you, if you're thinking about copying or distributing this Package. "Reasonable copying fee" is whatever you can justify on the basis of media cost, duplication charges, time of people involved, and so on. (You will not be required to justify it to the Copyright Holder, but only to the computing community at large as a market that must bear the fee.) "Freely Available" means that no fee is charged for the item itself, though there may be fees involved in handling the item. It also means that recipients of the item may redistribute it under the same conditions they received it. 1. You may make and give away verbatim copies of the source form of the Standard Version of this Package without restriction, provided that you duplicate all of the original copyright notices and associated disclaimers. 2. You may apply bug fixes, portability fixes and other modifications derived from the Public Domain or from the Copyright Holder. A Package modified in such a way shall still be considered the Standard Version. 3. You may otherwise modify your copy of this Package in any way, provided that you insert a prominent notice in each changed file stating how and when you changed that file, and provided that you do at least ONE of the following: a. place your modifications in the Public Domain or otherwise make them Freely Available, such as by posting said modifications to Usenet or an equivalent medium, or placing the modifications on a major archive site such as uunet.uu.net, or by allowing the Copyright Holder to include your modifications in the Standard Version of the Package. b. use the modified Package only within your corporation or organization. c. rename any non-standard executables so the names do not conflict with standard executables, which must also be provided, and provide a separate manual page for each non-standard executable that clearly documents how it differs from the Standard Version. d. make other distribution arrangements with the Copyright Holder. You may distribute the programs of this Package in object code or executable form, provided that you do at least ONE of the following: a. distribute a Standard Version of the executables and library files, together with instructions (in the manual page or equivalent) on where to get the Standard Version. b. accompany the distribution with the machine-readable source of the Package with your modifications. c. give non-standard executables non-standard names, and clearly document the differences in manual pages (or equivalent), together with instructions on where to get the Standard Version. d. make other distribution arrangements with the Copyright Holder. You may charge a reasonable copying fee for any distribution of this Package. You may charge any fee you choose for support of this Package. You may not charge a fee for this Package itself. However, you may distribute this Package in aggregate with other (possibly commercial) programs as part of a larger (possibly commercial) software distribution provided that you do not advertise this Package as a product of your own. You may embed this Package's interpreter within an executable of yours (by linking); this shall be construed as a mere form of aggregation, provided that the complete Standard Version of the interpreter is so embedded. The scripts and library files supplied as input to or produced as output from the programs of this Package do not automatically fall under the copyright of this Package, but belong to whomever generated them, and may be sold commercially, and may be aggregated with this Package. If such scripts or library files are aggregated with this Package via the so-called "undump" or "unexec" methods of producing a binary executable image, then distribution of such an image shall neither be construed as a distribution of this Package nor shall it fall under the restrictions of Paragraphs 3 and 4, provided that you do not represent such an executable image as a Standard Version of this Package. C subroutines (or comparably compiled subroutines in other languages) supplied by you and linked into this Package in order to emulate subroutines and variables of the language defined by this Package shall not be considered part of this Package, but are the equivalent of input as in Paragraph 6, provided these subroutines do not change the language in any way that would cause it to fail the regression tests for the language. Aggregation of this Package with a commercial distribution is always permitted provided that the use of this Package is embedded; that is, when no overt attempt is made to make this Package's interfaces visible to the end user of the commercial distribution. Such use shall not be construed as a distribution of this Package. The name of the Copyright Holder may not be used to endorse or promote products derived from this software without specific prior written permission. THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. The End Unicode-LineBreak-Unicode-LineBreak-2016.007_02/Changes000066400000000000000000000344361273566223400222270ustar00rootroot000000000000002016.007_02 Sat Jul 02 2016 ! lib/Text/LineFold.pm ! t/04fold.t - CPAN RT#115146: Space indenting not happening with "From" or " ". 2016.003 Tue Mar 29 2016 # No new features, version number as canonical numstring. 2016.002_27 Sat Feb 27 2016 ! Makefile.PL.sombok - CPAN RT#112078: Compiling libsombok.a fails due to invalid path for object files. 2015.12 Sun Nov 29 2015 # No new features. 2015.011_09 Mon Nov 09 2015 ! LineBreak.xs - uvnui_to_utf8() was obsoleted by Perl 5.20.0. Use uvchr_to_utf8(). 2015.011_03 Sun Nov 03 2015 ! LineBreak.xs - Bug fix: CPAN RT #106714: Unicode::GCString fails to handle numeric value, mostly on *BSD and Mac OS X. ! t/10gcstring.t - Adding more case. 2015.11 Sun Nov 01 2015 # No new features. ! lib/Unicode/LineBreak.pm - CPAN RT #106859: The latest version is not indexed. ! t/10gcstring.t - testing that GCString stringify numeric arguments (CPAN RT #106714). 2015.07.16 Fri Jul 17 2015 # No new features. ! LineBreak.xs - utf8_to_uvuni_buf has been deprecated by Perl 5.19.4. 2015.06 Sun Jun 21 2015 # Unicode 8.0.0, bundled sombok 2.4.0. ! lib/Unicode/LineBreak.pod ! lib/POD2/JA/Unicode/LineBreak.pod ! t/18currency.y - add a change for U+20BE. 2014.06 Fri Jun 20 2014 # Unicode 7.0.0, bundled sombok 2.3.2. 2014.004_26 Sat May 04 2014 # Unicode 7.0.0beta at Mar 18, bundled sombok-2.3.2beta1. ! lib/Unicode/LineBreak.pod ! lib/POD2/JA/Unicode/LineBreak.pod ! t/18currency.t - add a change for U+20BB. 2013.11 Thu Nov 21 2013 # No new features. - debian/ - Removed becuase it was not maintained. ! lib/Unicode/GCString.pm - I forgot to bump up version to 2013.10. ! lib/POD2/JA/Text/LineFold.pod ! lib/POD2/JA/Unicode/GCString.pod ! lib/POD2/JA/Unicode/LineBreak.pod ! lib/Text/LineFold.pm ! lib/Unicode/GCString.pod ! lib/Unicode/LineBreak.pod ! Makefile.PL - Repository has been moved to GitHub. 2013.10 Fri Oct 04 2013 # Unicode 6.3.0, bundled sombok-2.3.1. ! Makefile.PL ! Makefile.PL.sombok - Added include path of libthai to sombok/Makefile.PL for such as Mac OS X. ! perl-Unicode-LineBreak.spec - mv POD2::JA manpages into %{_mandir}/ja/. 2013.008_06 Sun Aug 25 2013 + lib/POD2/JA/Text/LineFold.pod + lib/POD2/JA/Unicode/GCString.pod + lib/POD2/JA/Unicode/LineBreak.pod - lib/Text/LineFold/JA_JP.pod - lib/Unicode/GCString/JA_JP.pod - lib/Unicode/LineBreak/JA_JP.pod - Moved Japanese PODs to suit POD2 feature. ! lib/POD2/JA/Unicode/GCString.pod ! lib/Unicode/GCString.pod - Commented-out "Methods planned to be deprecated". ! lib/POD2/JA/Unicode/LineBreak.pod ! lib/Unicode/LineBreak.pod - Addition about IDEOGRAPHIC SPACE. - Added description about predefined property values for currency symbols. - Commented-out "Methods Planned to be Deprecated" and "Obsoleted Options". ! lib/Unicode/LineBreak/Constants.pm - A new constant IDEOGRAPHIC_SPACE. ! Makefile.PL - Now "make dist" generates META files. + t/18currency.t - On reserved codepoints for currency symbols. 2013.004_26 Sat Apr 27 2013 ! Makefile.PL - Use $(NOOP) instead of true, for Windows. - Abort make if older libthai was found. ! Linebreak.xs ! typemap - Decode arguments without utf8 flag as ISO-8859-1: CPAN RT #84661. ! t/17prop.t - Added tests for non-utf8-flagged arguments. 2013.003_11 Mon Mar 25 2013 # Unicode 6.3.0 beta at 2013-03-02, bundled sombok-2.3.1b. 2012.10 Mon Oct 01 2012 # Unicode 6.2.0, bundled sombok-2.3.0. ! lib/Unicode/LineBreak/JA_JP.pod ! lib/Unicode/LineBreak.pod - Undocumented EA_ZA and EA_ZW introduced by sombok-2.3.0. - Added a note to state EA_Z* are not a part of Standard. ! lib/Unicode/LineBreak.pm - added warning for obsoleted options: TailorEA, TailorLB and UserBreaking. ! t/03ns.t ! t/06context.t ! t/09uri.t - Removed tests for obsoleted options. 2012.008_16 Sat Aug 18 2012 ! lib/Unicode/LineBreak.pm ! lib/Unicode/LineBreak.pod ! lib/Unicode/LineBreak/JA_JP.pod ! t/00GraphemeBreakTest.t ! t/00LineBreakTest.t - Updates for latest 6.2.0beta. - Bundled sombok-2.3.0gamma1. 2012.007_16 Thu Jul 12 2012 ! Makefile.PL: - Use sombok >= 2.3.0. ! t/00LineBreakTest.t: - Temporarily skip problematic ZJ subtests. ! lib/Unicode/GCString.pod: ! lib/Unicode/GCString/JA_JP.pod: - Updated citation. ! lib/Unicode/LineBreak.pod: ! lib/Unicode/LineBreak/JA_JP.pod: - Updated citation. - Added a section about tailoring for IDEOGRAPHIC SPACE (not yet written). See , by Koji Ishii. 2012.007_14 Thu Jul 12 2012 # Not really released 2012.06 Sat Jun 02 2012 ! lib/Unicode/LineBreak/Defaults.pm.sample: - removed obsoleted options. ! Makefile.PL: ! META.json: - fixed broken {'meta-spec'}{'version'}. 2012.005_18 Sat May 26 2012 ! lib/Unicode/LineBreak.pod: ! lib/Unicode/LineBreak/JA_JP.pod: - Added "Incompatible Changes" section. ! LineBreak.xs: - Fix for all versions of Perl: CPAN RT #77394. utf8_to_uvuni has been deprecated by Perl 5.16 since it may read out of buffer. - Chg: LineBreak: eawidth() and lbclass() were removed. ! Makefile.PL: ! META.yml: - requires ExtUtils::MakeMaker >= 6.26. + META.json: - added. + t/000.t: - added. ! t/01break.t: ! t/08partial.t: - Added ko-decomp test. 2012.04 Sun Apr 01 2012 ! lib/Unicode/GCString.pod: ! lib/Unicode/GCString/JA_JP.pod: - Let flag(), lbclass() and lbclass_ext() be deprecated. ! lib/Unicode/LineBreak.pod: ! lib/Unicode/LineBreak/JA_JP.pod: - Let eawidth(), lbrule() and lbclass() be deprecated. ! LineBreak.xs: - New: GCString::lbc(), GCString::lbcext(). ! Makefile.PL: - Fix: Win32 etc.: Include XSUB.h in sombok.h to avoid mismatches of malloc implementations, when bundled sombok is used. - Requires sombok >= 2.2.0. 2012.003_30 Tue Mar 20 2012 # Mainly fixes of bugs emited by previous development release. ! lib/Unicode/GCString.pod: ! lib/Unicode/GCString/JA_JP.pod: - Added a caveat about calling the grapheme cluster as "grapheme". ! lib/Unicode/LineBreak.pod: ! lib/Unicode/LineBreak/JA_JP.pod: - some corrections. ! LineBreak.xs: - Fix: Win32/MSVC: lack of strcasecmp(). ! Makefile.PL: ! Makefile.PL.sombok: - Fix: error on test-subdirs with *BSD make. 2012.003_26 Tue Mar 13 2012 # Tests building on Win32 are appreciated. ! lib/Text/LineFold.pm: - use breakingRule(). ! LineBreak.xs: - Cast ref_func() according to strict prototyping. - New: LineBreak::breakingRule(). lbrule() may be deprecated in near future. ! Makefile.PL - requires sombok 2.1.1. - Now configure script won't be used to build bundled sombok. - Fix: libthai cflags were ommitted. - get UC test files only when they do not exist. + Makefile.PL.sombok - To make bundled sombok static library. - sombok/configure, etc. - No longer shipped with full package of sombok. + t/00GraphemeBreakTest.t - Added test suite provided by Unicode Consortium. - Cases including single surrogates will be skipped. 2012.003_13 Mon Mar 12 2012 - Withdrawn 2012.003_11 Mon Mar 12 2012 - Withdrawn 2012.02 Sat Feb 04 2012 ! t/01break.t - Added Sanskrit case. 2012.001_29 Sun Jan 29 2012 # trunk, Unicode data 6.1.0 beta. ! LineBreak.xs - Bug Fix: lbclass() & eawidth() return wrong values for strings with single non-ASCII byte, e.g. lbclass("\x{A0}") returns such as LB_H3. - Chg: CharMax may be 0 (unlimited). NOTE: this feature has not been tested enough. - Added new option ViramaAsJoiner. ! Makefile.PL - Needs Sombok >= 2.1.0beta2. - fix: Solaris sh blames test with ``-e''. Use ``-f''. ! lib/Unicode/LineBreak.pm - Chg: New option ViramaAsJoiner which is by default "YES". ! lib/Unicode/LineBreak.pod ! lib/Unicode/LineBreak/JA_JP.pod - Update documentation: - Added descriptions about LBC CJ & HL. - Modify citation: [UAX #11] really had not been revised after rev. 19 (2009). - Added citation for [JLREQ]. ! t/00LineBreakText.t - skip empty data. + t/17prop.h - added for fix on LineBreak.xs 2011.12 Sat Dec 17 2011 # Backport from trunk with Unicode data 6.0.0. ! LineBreak.xs - Bug Fix: lbclass() & eawidth() return wrong values for strings with single non-ASCII byte, e.g. lbclass("\x{A0}") returns LB_H3. + t/17prop.t - added for fix on LineBreak.xs 2011.11 Tue Nov 01 2011 # Backport from trunk with Unicode data 6.0.0. + debian/* - Added Debian packaging info at Debian sid, packaged by Emmanuel Bouthenot. ! GPL - Street address of FSF: CPAN RT #69999. ! lib/Text/LineFold.pm: - Bump up version to 2011.10. - _is_indirect(): Simplified as enhancement of lbrule(). ! lib/Unicode/LineBreak.pod ! lib/Unicode/LineBreak/JA_JP.pod - Some updates. ! Makefile.PL - Bundle LineBreakTest.txt in dist for test #00. ! META.yml - Quote abstract containing `#'. ! sombok/* - Bundled Sombok 2.0.6. ! t/00LineBreakText.t - Now all tests shall be past. - Add test names. ! t/01break.t - Added Hebrew (he) test. 2011.010_26 Wed Oct 26 2011 + debian/* - Added Debian packaging info at Debian sid, packaged by Emmanuel Bouthenot. ! lib/Text/LineFold.pm: - Bump up version to 2011.10. - _is_indirect(): Simplified as enhancement of lbrule(). ! LineBreak.xs: - lbrule(): Use new linebreak_get_lbrule() instead of linebreak_lbrule(). ! Makefile.PL - Try getting LineBreakTest.txt for test #00. ! t/00LineBreakText.t - Now all tests shall be past. ! sombok/* - Bundled Sombok 2.0.6beta2. 2011.010_21 Fri Oct 21 2011 ! GPL - Street address of FSF: CPAN RT #69999. ! lib/Unicode/LineBreak.pod ! lib/Unicode/LineBreak/JA_JP.pod - Some updates. ! Makefile.PL - Requires Sombok >= 2.0.6. ! META.yml - Quote abstract containing `#'. ! t/00LineBreakTest.t - Add test names. ! t/01break.t - Added Hebrew (he) test. 2011.05 Sun May 01 2011 ! Makefile.PL - Pass (full) names of ar & ranlib to configure: link fails if they were not found in PATH (e.g. Solaris; ar is under /usr/ccs/bin). - New versioning scheme of Sombok. See sombok/ChangeLog. ! lib/Text/LineFold/JA_JP.pod - typo. ! perl-Unicode-LineBreak.spec - Update %{sombok_version}. increased epoch. ! t/07sea.t ! t/08partial.t - Skip tests with older libthai: see CPAN RT #61922. ! t/16regex.t - Skip tests when perlbug #82302 was detected (Perl 5.13.8 around 300-388). 2011.04.26 Sat Apr 16 2011 ! lib/Text/LineFold.pm ! lib/Text/LineFold/JA_JP.pod - Small fixes. ! lib/Unicode/LineBreak.pod ! lib/Unicode/LineBreak/JA_JP.pod - Added example usage of hashref override. Small fixes. ! sombok - Update to 2011.5RC: Fix CPAN RT #67505 Long URLs cause infinite loop. ! MANIFEST - Eliminate sombok/data and some of sombok/src/* from CPAN distribution. To get full package of Sombok see . 2011.04 Fri Apr 01 2011 + perl-Unicode-LineBreak.spec - RPM spec file. ! LineBreak.xs - New constant SOMBOK_VERSION. ! Makefile.PL - requires sombok >= 2011.4: compilation failed by Solaris cc. 2011.03.21 Mon Mar 21 2011 ! Makefile.PL - MYEXTLIB: avoid compilation failure due to parallel execution by make. ! lib/Unicode/LineBreak.pod ! lib/Unicode/LineBreak/JA_JP.pod - Doc: U+3xxxx (a.k.a. Tertiary Ideographic Plane) contains old hanzi. 2011.003_13 Sun Mar 13 2011 ! lib/Unicode/GCString/JA_JP.pod ! lib/Unicode/LineBreak.pod ! lib/Unicode/LineBreak/JA_JP.pod - Update REFERENCES. ! Makefile.PL - removed sombok/configure dependency. - added dist PREOP to correct timestamp of files included into distribution. - possible fix for RT #61922, about libthai. Requires sombok >= 2011.3. 2011.03.05 Sat Mar 05 2011 ! lib/Unicode/LineBreak.pm - Chg: CharactersMax, ColumnsMax, ColumnsMin, SizingMethod & UrgentBreaking are renamed to CharMax, ColMax, ColMin, Sizing & Urgent, respectively. Old names may also be used. ! LineBreak.xs - Bug Fix: Unicode::LineBreak: Cannot update config from inside Perl callbacks. - Fix: Unicode::LineBreak::_config(): Suppress ``Use of uninitialized value in subroutine entry'' warning. - Imp: Unicode::GCString::substr(): replacement may be also Unicode string. - Imp: Functions croak using strerror(). - Imp: Now Unicode::LineBreak & Unicode::GCString are SvREADONLY_on()'ed. ! typemap - Unify maps for unistr_t * and gcstring_t *. - Added notes. ! t/00LineBreakTest.t - Simplified code. ! t/lf.pl ! t/04fold.t - Added tests. ! lib/Unicode/LineBreak.pod ! lib/Unicode/LineBreak/JA_JP.pod - Updated. 2011.03 Sat Mar 05 2011 - Withdrawn. 2011.002_26 Sat Feb 26 2011 ! lib/Unicode/LineBreak/Constants.pm - ``LEFT_'' and ``RIGHT_'' are inappropriate on right-to-left contexts. Use ``BACKWORD_'' and ``FORWARD_'' instead. ! lib/Unicode/LineBreak.pm - Moved codes for custom property maps to XS. - Chg: TailorEA & TailorLB options are obsoleted. Use EAWidth & LBClass options instead. ! LineBreak.xs - Removed _loadmap(): use linebreak_update_*(). - Removed _propvals(): Added EAWidths() and LBClasses() to get prop. vals. - Fix: Stash (instance of hashref) was not allocated at construction time. - Most of conversions between C and Perl are moved to typemap. ! typemap - Added typemap for linebreak_t *, gcstring_t *, unistr_t *, generic string etc. ! t/03ns.t - Added tests for obsoleted TailorLB option. ! t/06context.t - Added tests for obsoleted TailorEA option. ! Makefile.PL - Requires sombok >= 2011.2beta1. 2011.002_19 Sat Feb 19 2011 * Update ppport.h to 3.19. * Most of config() codes are moved to XS. * Cleanup handling of REGEXP in XS. 2011.002_11 Fri Feb 11 2011 * Imp: regex matching are moved to XS. * Chg: instead of UserBreaking option, use Prep option. * Requires Sombok >= 2011.1RC. 2011.01 Thu Jan 20 2011 * New major release. * Requires Sombok >= 2011.0. Important Changes beside Unicode-LineBreak-1.x: * 1.x had memory leaks & potantial buffer overrun vulnerabilities. Switching to release 20XX is strongly recommended. * Major changes of public interface: - Format callback name "DEFAULT" was deprecated. Use "SIMPLE". - SizingMethod callback name "DEFAULT" was deprecated. Use "UAX11". - SizingMethod callback with extra 6th argument was deprecated. - UrgentBreaking callback name "NONBREAK" was deprecated. Use undef. - UrgentBreaking callback MUST take two arguments, NOT five. - In array context, break() and break_partial() return an array of lines. * Linebreak library package was renamed to Sombok. About change history until release 1.x see Changes.REL1 file. Local Variables: mode: change-log change-log-default-name: "Changes" tab-width: 2 left-margin: 2 End: Unicode-LineBreak-Unicode-LineBreak-2016.007_02/Changes.REL1000066400000000000000000000162271273566223400227270ustar00rootroot000000000000002011.0 Xxx Xxx XX XXXX * Imp: Added ComplexBreaking option to choose SA complex breaking will be performed or not (if it is suppoted). * Added test #14; skipping test #7 on non-SA build. * Chg: Changes of public interface: - Format callback name "DEFAULT" was deprecated. Use "SIMPLE". - SizingMethod callback name "DEFAULT" was deprecated. Use "UAX11". - SizingMethod callback with extra 6th argument was deprecated. - UrgentBreaking callback name "NONBREAK" was deprecated. Use undef. - UrgentBreaking callback should take two arguments, not five. * Imp: In array context, break() and break_partial() return an array of lines. * Chg: Linebreak library package was renamed to Sombok. * Requires Sombok >= 2011.0. * Doc: some typos. * Doc: Unicode::GCString: - split POD and code. - added docs on undocumented things. 1.011 Sat Jan 01 2011 * Updated linebreak library (improved conformance to UAX #14). * Added BreakIndent option that defaults to "YES". * Requires linebreak >= 1.10.0 * Added test #0 (partially failed). 1.010 Wed Dec 29 2010 * New linebreak library (1.9; fixed broken pair table). * Added constants for some ambiguous quotation marks (QU). * Doc: some modifications 1.008.2 Tue Nov 16 2010 * Fixed linebreak library (only for Win32). 1.008.1 Mon Nov 01 2010 - Withdrawn 1.008 Mon Oct 11 2010 * By now linebreak library will be maintained independently. See repository . * Removed Version.pm. * Needs linebreak >= 1.8.1. * MSWin32+MinGW support (testing). * Doc: Unicode::GCString: small fix. 1.008_02 Sat Sep 18 2010 * Update UCD to 6.0.0beta at Sep 1. * Needs linebreak >= 1.8. 1.008_01 Tue Aug 10 2010 * Update UCD to 6.0.0beta at Jun 21. * Check if libthai is broken on compile time. * Small fixes on docs. 1.007.520 Wed Dec 30 2009 * Fix: Text::LineFold: Newline option doesn't have effect for fold() second style. * New: Text::LineFold::unfold: ``FLOWEDSP'' method for DelSp=No. * Imp: Unicode::GCString: New undocumented method lbclass_ext(). * Imp: Unicode::GCString: lbclass() and lbclass_ext() allow negative arguments. * Fix: non-CM characters in grapheme extender were ignored. * Requires linebreak 1.5. * removed unused data/rules2pl.pl. * Added test #13. * Some typos. 1.006.520 Sat Dec 05 2009 * Fix: Update VERSION for Text::LineFold. * Fix: Text::LineFold: non-Unicode was not allowed for Newline option. * Doc: Small fixes. 1.005.520 Mon Nov 30 2009 * Imp: Text::LineFold::fold(): Second calling style to ease transition from Text::Wrap::wrap(). * Chg: Text::LineFold::fold() considers tab stops: Added new option TabSize. * Doc: Fixed example of SizingMethod option to count spaces at beginning of lines. * Doc: Some typos. * Makefile.PL: Support for separate linebreak library (incompleted). * Added test #12. 1.004.520 Sun Oct 18 2009 * Imp: XS'ized built-in Format & UrgentBreak functions. 5-10% faster. * Doc: Added an example of SizingMethod option. * Doc: Added notes for utf8_mg_pos_cache_update panic. * Added tests for Format & UrgentBreak options. * Requires linebreak 1.4.x. 1.003.520 Thu Oct 15 2009 * NOTE: For changes on linebreak library see linebreak/ChangeLog. * Fix: Makefile.PL: fix for dmake+MSWin32 (hopefully). * Fix: LineBreak.xs: UTF8_MAXBYTES exists as of Perl>=5.8.8. Use UTF8_MAXLEN. * Fix: URGENT_BREAKING_FUNCS{FORCE}: Workaround for Perl5 RT #69422 - utf8_mg_pos_cache_update panic - Perl 5.10.1 & (maybe) 5.10.0. * Imp: LineBreak.xs: Check data type of self argument. * Imp: preprocess() get Perl (utf8) string then returns array. Faster. * Imp: XS'ized break(). * New: Unicode::GCString::join(). * Added test for Format feature. * Doc: Small fixes. 1.003_11 Sun Oct 11 2009 * Imp: Almost full XS version. Isolated pure-C codes to linebreak library. * Chg: Update Unicode data to version 5.2.0. * Doc: Small fix. 1.003_09 Sun Oct 04 2009 * Transitional release - NOTE that this release is VERY slow. * Imp/Chg: Custom functions (for sizing, user breaking, urgent breaking and format) accepts grapheme cluster string. * Cleanup: separate C codes. * Doc: Added more example. * Doc: Added link to CPAN RT page. * Doc: Added a bug to BUGS section. 1.003_06 Sat Sep 12 2009 * Fix: Get back LB9: Some CM characters may be single grapheme cluster as their Grapheme_Cluster_Break property is Control. * Imp: Almost XS'ized grapheme cluster processing. Approx. 25% faster. 1.003_05 Thu Sep 10 2009 * Chg: Include suffixing ``URL:'' to URI pattern. * Imp: Efficient user-breaking. Approx. 10% faster. 1.003_03 Sat Sep 06 2009 * Chg: Deprecated non-XS version. * Imp: XS'ized character data. Implemented hash index for searching. Approx. 10% faster in speed, approx. 40KB more in memory usage. * Removed ranges of trivial entries (e.g. XX on lbmap), reserved for CJK ideographs etc. from character data maps. * Update Unicode data for 5.2.0beta to those of Aug 24. * Doc: Fixed attributes for some UCS ranges. Correct several typos. 1.002.510 Sun Jun 21 2009 * Chg: Apply CMOS rules to "BREAKURI". 1.000.510 Sat Jun 20 2009 * Fix: broken testin/th.in * Imp: Added QUESTIONABLE_NARROW_SIGNS constant. * Chg: $UNICODE_VERSION to constant UNICODE_VERSION * Fix: Sync options of Text::LineFold->new() with base class. 1.000_02 Sat Jun 13 2009 * Fix: Correct Thai (th) tests. * Fix: Unassigned code points are given N, not A (except those of SIP are W). * Imp: strsize(): Now texts are broken at boundaries of graphame cluster. * Chg: SA characters are resolved by Grapheme_Cluster_Break property, as some suffixing vowels have General_Category Lo and some others have Grapheme_Cluster_Break Prepend property. * Added tests for incremental input. * Chg: Remove @LB_CLASSES which may not be used; * Chg: Remove NarrowAL and NSKanaAsID options: use TailorEA/TailorLB options. 1.000_01 Sat May 30 2009 * Fix: broken non-XS strsize(). * New: Added Thai word segmentation using libthai. 0.005.510 Fri May 29 2009 * Chg: renamed functions: remove ``get-''. * New: break_partial() method. * New: eawidth() method, XS'ized. * Imp: XS'ized strsize(). * Fix: lbclass(): use SV* instead of char*. * Fix: Mc characters aren't nonspacing: Mn, Me, Cc, Cf, Zl and Zp are nonspacing. * Cleanup codes and PODs. 0.004.510 Sat May 23 2009 * New: method getstrsize(). * Imp: XS'ize getlbclass(). * Doc: separate POD. Added a few examples. 0.003.510 Thu May 21 2009 * Fix: Perl 5.6.x - skip tests with older POD::Simple. * Imp: remove \p{...} pattern matchings slower than substr(). 0.002.510 Sun May 17 2009 * Partial XS support. * lib/Unicode/LineBreak.pm: Added second format of SizingMethod subroutines. 0.001.510 Sun May 10 2009 * No new features. Small fixes on documentation. 0.001 Sat May 09 2009 * 0.001 release. - Added urgent/custom breaking features. - Added tests. 0.001_11 Sat May 02 2009 * 0.001beta. Added Text::LineFold module. 0.001_03 Wed Apr 29 2009 * alpha. 0.001_02 Sun Apr 26 2009 * pre-alpha. 0.001_01 Sat Apr 18 2009 * pre-alpha. Local Variables: mode: change-log change-log-default-name: "Changes" tab-width: 2 left-margin: 2 End: Unicode-LineBreak-Unicode-LineBreak-2016.007_02/GPL000066400000000000000000000432541273566223400212770ustar00rootroot00000000000000 GNU GENERAL PUBLIC LICENSE Version 2, June 1991 Copyright (C) 1989, 1991 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This General Public License applies to most of the Free Software Foundation's software and to any other program whose authors commit to using it. (Some other Free Software Foundation software is covered by the GNU Lesser General Public License instead.) You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things. To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it. For example, if you distribute copies of such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software. Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations. Finally, any free program is threatened constantly by software patents. We wish to avoid the danger that redistributors of a free program will individually obtain patent licenses, in effect making the program proprietary. To prevent this, we have made it clear that any patent must be licensed for everyone's free use or not licensed at all. The precise terms and conditions for copying, distribution and modification follow. GNU GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any derivative work under copyright law: that is to say, a work containing the Program or a portion of it, either verbatim or with modifications and/or translated into another language. (Hereinafter, translation is included without limitation in the term "modification".) Each licensee is addressed as "you". Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running the Program is not restricted, and the output from the Program is covered only if its contents constitute a work based on the Program (independent of having been made by running the Program). Whether that is true depends on what the Program does. 1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and give any other recipients of the Program a copy of this License along with the Program. You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. 2. You may modify your copy or copies of the Program or any portion of it, thus forming a work based on the Program, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: a) You must cause the modified files to carry prominent notices stating that you changed the files and the date of any change. b) You must cause any work that you distribute or publish, that in whole or in part contains or is derived from the Program or any part thereof, to be licensed as a whole at no charge to all third parties under the terms of this License. c) If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the most ordinary way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this License. (Exception: if the Program itself is interactive but does not normally print such an announcement, your work based on the Program is not required to print an announcement.) These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Program, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Program, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Program. In addition, mere aggregation of another work not based on the Program with the Program (or with a work based on the Program) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. 3. You may copy and distribute the Program (or a work based on it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you also do one of the following: a) Accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, b) Accompany it with a written offer, valid for at least three years, to give any third party, for a charge no more than your cost of physically performing source distribution, a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, c) Accompany it with the information you received as to the offer to distribute corresponding source code. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form with such an offer, in accord with Subsection b above.) The source code for a work means the preferred form of the work for making modifications to it. For an executable work, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the executable. However, as a special exception, the source code distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. If distribution of executable or object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place counts as distribution of the source code, even though third parties are not compelled to copy the source along with the object code. 4. You may not copy, modify, sublicense, or distribute the Program except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense or distribute the Program is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. 5. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Program or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Program (or any work based on the Program), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Program or works based on it. 6. Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties to this License. 7. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Program at all. For example, if a patent license would not permit royalty-free redistribution of the Program by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Program. If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply and the section as a whole is intended to apply in other circumstances. It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system, which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. 8. If the distribution and/or use of the Program is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Program under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. 9. The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of this License, you may choose any version ever published by the Free Software Foundation. 10. If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. Also add information on how to contact you by electronic and paper mail. If the program is interactive, make it output a short notice like this when it starts in an interactive mode: Gnomovision version 69, Copyright (C) year name of author Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, the commands you use may be called something other than `show w' and `show c'; they could even be mouse-clicks or menu items--whatever suits your program. You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the program, if necessary. Here is a sample; alter the names: Yoyodyne, Inc., hereby disclaims all copyright interest in the program `Gnomovision' (which makes passes at compilers) written by James Hacker. , 1 April 1989 Ty Coon, President of Vice This General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License. Unicode-LineBreak-Unicode-LineBreak-2016.007_02/LineBreak.xs000066400000000000000000001121661273566223400231410ustar00rootroot00000000000000/* * LineBreak.xs - Perl XS glue for Sombok package. * * Copyright (C) 2009-2013 Hatuka*nezumi - IKEDA Soji . * * This file is part of the Unicode::LineBreak package. This program is * free software; you can redistribute it and/or modify it under the same * terms as Perl itself. * * $Id$ */ #include "EXTERN.h" #include "perl.h" #include "XSUB.h" #define NEED_newRV_noinc #define NEED_sv_2pv_flags #define NEED_newRV_noinc_GLOBAL #define NEED_sv_2pv_flags_GLOBAL #define NEED_sv_2pv_nolen #include "ppport.h" #include "sombok.h" /* for Win32 with Visual Studio (MSVC) */ #ifdef _MSC_VER # ifndef snprintf # define snprintf _snprintf # endif /* snprintf */ # define strcasecmp _stricmp #endif /* _MSC_VER */ /* Type synonyms for typemap. */ typedef IV swapspec_t; typedef gcstring_t *generic_string; /*** *** Data conversion. ***/ /* * Create Unicode string from Perl utf8-flagged string. */ static unistr_t *SVtounistr(unistr_t *buf, SV *str) { U8 *utf8, *utf8ptr; STRLEN utf8len, unilen, len; unichar_t *uniptr; if (buf == NULL) { if ((buf = malloc(sizeof(unistr_t))) == NULL) croak("SVtounistr: %s", strerror(errno)); } else if (buf->str) free(buf->str); buf->str = NULL; buf->len = 0; if (SvOK(str)) utf8 = (U8 *)SvPV(str, utf8len); else return buf; if (utf8len <= 0) return buf; unilen = utf8_length(utf8, utf8 + utf8len); if ((buf->str = (unichar_t *)malloc(sizeof(unichar_t) * unilen)) == NULL) croak("SVtounistr: %s", strerror(errno)); utf8ptr = utf8; uniptr = buf->str; while (utf8ptr < utf8 + utf8len) { #if PERL_VERSION >= 20 || (PERL_VERSION == 19 && PERL_SUBVERSION >= 4) *uniptr = (unichar_t) NATIVE_TO_UNI( utf8_to_uvchr_buf(utf8ptr, utf8 + utf8len, &len)); #elif PERL_VERSION >= 16 || (PERL_VERSION == 15 && PERL_SUBVERSION >= 9) *uniptr = (unichar_t) utf8_to_uvuni_buf(utf8ptr, utf8 + utf8len, &len); #else *uniptr = (unichar_t) utf8n_to_uvuni(utf8ptr, utf8 + utf8len - utf8ptr, &len, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY); #endif if (len < 0) { free(buf->str); buf->str = NULL; buf->len = 0; croak("SVtounistr: Not well-formed UTF-8"); } if (len == 0) { free(buf->str); buf->str = NULL; buf->len = 0; croak("SVtounistr: Internal error"); } utf8ptr += len; uniptr++; } buf->len = unilen; return buf; } /* * Create Unicode string from Perl string NOT utf8-flagged. */ static unistr_t *SVupgradetounistr(unistr_t *buf, SV *str) { char *s; size_t len, i; if (buf == NULL) { if ((buf = malloc(sizeof(unistr_t))) == NULL) croak("SVupgradetounistr: %s", strerror(errno)); } else if (buf->str) free(buf->str); buf->str = NULL; buf->len = 0; if (SvOK(str)) s = SvPV(str, len); else return buf; if (len == 0) return buf; if ((buf->str = malloc(sizeof(unichar_t) * len)) == NULL) croak("SVupgradetounistr: %s", strerror(errno)); for (i = 0; i < len; i++) buf->str[i] = (unichar_t)(unsigned char)s[i]; buf->len = len; return buf; } /* * Create Perl utf8-flagged string from Unicode string. */ static SV *unistrtoSV(unistr_t *unistr, size_t uniidx, size_t unilen) { U8 *buf = NULL, *newbuf; STRLEN utf8len; unichar_t *uniptr; SV *utf8; if (unistr == NULL || unistr->str == NULL || unilen == 0) { utf8 = newSVpvn("", 0); SvUTF8_on(utf8); return utf8; } utf8len = 0; uniptr = unistr->str + uniidx; while (uniptr < unistr->str + uniidx + unilen && uniptr < unistr->str + unistr->len) { if ((newbuf = realloc(buf, sizeof(U8) * (utf8len + UTF8_MAXLEN + 1))) == NULL) { free(buf); croak("unistrtoSV: %s", strerror(errno)); } buf = newbuf; #if PERL_VERSION >= 20 || (PERL_VERSION == 19 && PERL_SUBVERSION >= 4) utf8len = uvchr_to_utf8(buf + utf8len, UNI_TO_NATIVE(*uniptr)) - buf; #else utf8len = uvuni_to_utf8(buf + utf8len, *uniptr) - buf; #endif uniptr++; } utf8 = newSVpvn((char *)(void *)buf, utf8len); SvUTF8_on(utf8); free(buf); return utf8; } /* * Convert Perl object to C object */ #define PerltoC(type, arg) \ (INT2PTR(type, SvIV((SV *)SvRV(arg)))) /* * Create Perl object from C object */ # define setCtoPerl(arg, klass, var) \ STMT_START { \ sv_setref_iv(arg, klass, (IV)(var)); \ SvREADONLY_on(arg); \ } STMT_END static SV *CtoPerl(char *klass, void *obj) { SV *sv; sv = newSViv(0); setCtoPerl(sv, klass, obj); return sv; } /* * Convert Perl utf8-flagged string (GCString) to grapheme cluster string. */ static gcstring_t *SVtogcstring(SV *sv, linebreak_t *lbobj) { unistr_t unistr = {NULL, 0}; if (!sv_isobject(sv)) { SVtounistr(&unistr, sv); return gcstring_new(&unistr, lbobj); } else if (sv_derived_from(sv, "Unicode::GCString")) return PerltoC(gcstring_t *, sv); else croak("Unknown object %s", HvNAME(SvSTASH(SvRV(sv)))); } #if 0 /* * Convert Perl LineBreak object to C linebreak object. */ static linebreak_t *SVtolinebreak(SV *sv) { if (!sv_isobject(sv)) croak("Not object"); else if (sv_derived_from(sv, "Unicode::LineBreak")) return PerltoC(linebreak_t *, sv); else croak("Unknown object %s", HvNAME(SvSTASH(SvRV(sv)))); } #endif /* 0 */ /* * Convert Perl SV to boolean (n.b. string "YES" means true). */ static int SVtoboolean(SV *sv) { char *str; if (!sv || !SvOK(sv)) return 0; if (SvPOK(sv)) return strcasecmp((str = SvPV_nolen(sv)), "YES") == 0 || atof(str) != 0.0; return SvNV(sv) != 0.0; } /*** *** Other utilities ***/ /* * Do regex match once then returns offset and length. */ void do_pregexec_once(REGEXP *rx, unistr_t *str) { SV *screamer; char *str_arg, *str_beg, *str_end; size_t offs_beg, offs_end; screamer = unistrtoSV(str, 0, str->len); SvREADONLY_on(screamer); str_beg = str_arg = SvPVX(screamer); str_end = SvEND(screamer); if (pregexec(rx, str_arg, str_end, str_beg, 0, screamer, 1)) { #if PERL_VERSION >= 11 offs_beg = ((regexp *)SvANY(rx))->offs[0].start; offs_end = ((regexp *)SvANY(rx))->offs[0].end; #elif ((PERL_VERSION == 10) || (PERL_VERSION == 9 && PERL_SUBVERSION >= 5)) offs_beg = rx->offs[0].start; offs_end = rx->offs[0].end; #else /* PERL_VERSION */ offs_beg = rx->startp[0]; offs_end = rx->endp[0]; #endif str->str += utf8_length((U8 *)str_beg, (U8 *)(str_beg + offs_beg)); str->len = utf8_length((U8 *)(str_beg + offs_beg), (U8 *)(str_beg + offs_end)); } else str->str = NULL; SvREFCNT_dec(screamer); } /*** *** Callbacks for Sombok library. ***/ /* * Increment/decrement reference count */ void ref_func(void *sv, int datatype, int d) { if (sv == NULL) return; if (0 < d) SvREFCNT_inc((SV *)sv); else if (d < 0) SvREFCNT_dec((SV *)sv); } /* * Call preprocessing function */ static gcstring_t *prep_func(linebreak_t *lbobj, void *dataref, unistr_t *str, unistr_t *text) { AV *data; SV *sv, **pp, *func = NULL; REGEXP *rx = NULL; size_t count, i, j; gcstring_t *gcstr, *ret; if (dataref == NULL || (data = (AV *)SvRV((SV *)dataref)) == NULL) return (lbobj->errnum = EINVAL), NULL; /* Pass I */ if (text != NULL) { if ((pp = av_fetch(data, 0, 0)) == NULL) return (lbobj->errnum = EINVAL), NULL; #if ((PERL_VERSION >= 10) || (PERL_VERSION >= 9 && PERL_SUBVERSION >= 5)) if (SvRXOK(*pp)) rx = SvRX(*pp); #else /* PERL_VERSION */ if (SvROK(*pp) && SvMAGICAL(sv = SvRV(*pp))) { MAGIC *mg; if ((mg = mg_find(sv, PERL_MAGIC_qr)) != NULL) rx = (REGEXP *)mg->mg_obj; } #endif /* PERL_VERSION */ if (rx == NULL) return (lbobj->errnum = EINVAL), NULL; do_pregexec_once(rx, str); return NULL; } /* Pass II */ if ((pp = av_fetch(data, 1, 0)) == NULL) func = NULL; else if (SvOK(*pp)) func = *pp; else func = NULL; if (func == NULL) { if ((ret = gcstring_newcopy(str, lbobj)) == NULL) return (lbobj->errnum = errno ? errno : ENOMEM), NULL; } else { dSP; ENTER; SAVETMPS; PUSHMARK(SP); linebreak_incref(lbobj); /* mortal but should not be destroyed.*/ XPUSHs(sv_2mortal(CtoPerl("Unicode::LineBreak", lbobj))); XPUSHs(sv_2mortal(unistrtoSV(str, 0, str->len))); PUTBACK; count = call_sv(func, G_ARRAY | G_EVAL); SPAGAIN; if (SvTRUE(ERRSV)) { if (!lbobj->errnum) lbobj->errnum = LINEBREAK_EEXTN; return NULL; } if ((ret = gcstring_new(NULL, lbobj)) == NULL) return (lbobj->errnum = errno ? errno : ENOMEM), NULL; for (i = 0; i < count; i++) { sv = POPs; if (!SvOK(sv)) continue; gcstr = SVtogcstring(sv, lbobj); for (j = 0; j < gcstr->gclen; j++) { if (gcstr->gcstr[j].flag & (LINEBREAK_FLAG_ALLOW_BEFORE | LINEBREAK_FLAG_PROHIBIT_BEFORE)) continue; if (i < count - 1 && j == 0) gcstr->gcstr[j].flag |= LINEBREAK_FLAG_ALLOW_BEFORE; else if (0 < j) gcstr->gcstr[j].flag |= LINEBREAK_FLAG_PROHIBIT_BEFORE; } gcstring_replace(ret, 0, 0, gcstr); if (!sv_isobject(sv)) gcstring_destroy(gcstr); } PUTBACK; FREETMPS; LEAVE; } return ret; } /* * Call format function */ static char *linebreak_states[] = { NULL, "sot", "sop", "sol", "", "eol", "eop", "eot", NULL }; static gcstring_t *format_func(linebreak_t *lbobj, linebreak_state_t action, gcstring_t *str) { SV *sv; char *actionstr; int count; gcstring_t *ret; dSP; if (action <= LINEBREAK_STATE_NONE || LINEBREAK_STATE_MAX <= action) return NULL; actionstr = linebreak_states[(size_t)action]; ENTER; SAVETMPS; PUSHMARK(SP); linebreak_incref(lbobj); /* mortal but should not be destroyed. */ XPUSHs(sv_2mortal(CtoPerl("Unicode::LineBreak", lbobj))); XPUSHs(sv_2mortal(newSVpv(actionstr, 0))); XPUSHs(sv_2mortal(CtoPerl("Unicode::GCString", gcstring_copy(str)))); PUTBACK; count = call_sv(lbobj->format_data, G_SCALAR | G_EVAL); SPAGAIN; if (SvTRUE(ERRSV)) { if (!lbobj->errnum) lbobj->errnum = LINEBREAK_EEXTN; POPs; return NULL; } else if (count != 1) croak("format_func: internal error"); else sv = POPs; if (!SvOK(sv)) ret = NULL; else ret = SVtogcstring(sv, lbobj); if (sv_isobject(sv)) ret = gcstring_copy(ret); PUTBACK; FREETMPS; LEAVE; return ret; } /* * Call sizing function */ static double sizing_func(linebreak_t *lbobj, double len, gcstring_t *pre, gcstring_t *spc, gcstring_t *str) { int count; double ret; dSP; ENTER; SAVETMPS; PUSHMARK(SP); linebreak_incref(lbobj); /* mortal but should not be destroyed. */ XPUSHs(sv_2mortal(CtoPerl("Unicode::LineBreak", lbobj))); XPUSHs(sv_2mortal(newSVnv(len))); XPUSHs(sv_2mortal(CtoPerl("Unicode::GCString", gcstring_copy(pre)))); XPUSHs(sv_2mortal(CtoPerl("Unicode::GCString", gcstring_copy(spc)))); XPUSHs(sv_2mortal(CtoPerl("Unicode::GCString", gcstring_copy(str)))); PUTBACK; count = call_sv(lbobj->sizing_data, G_SCALAR | G_EVAL); SPAGAIN; if (SvTRUE(ERRSV)) { if (!lbobj->errnum) lbobj->errnum = LINEBREAK_EEXTN; POPs; return -1; } else if (count != 1) croak("sizing_func: internal error"); else ret = POPn; PUTBACK; FREETMPS; LEAVE; return ret; } /* * Call urgent breaking function */ static gcstring_t *urgent_func(linebreak_t *lbobj, gcstring_t *str) { SV *sv; int count; size_t i; gcstring_t *gcstr, *ret; dSP; ENTER; SAVETMPS; PUSHMARK(SP); linebreak_incref(lbobj); /* mortal but should not be destroyed. */ XPUSHs(sv_2mortal(CtoPerl("Unicode::LineBreak", lbobj))); XPUSHs(sv_2mortal(CtoPerl("Unicode::GCString", gcstring_copy(str)))); PUTBACK; count = call_sv(lbobj->urgent_data, G_ARRAY | G_EVAL); SPAGAIN; if (SvTRUE(ERRSV)) { if (!lbobj->errnum) lbobj->errnum = LINEBREAK_EEXTN; return NULL; } if (count == 0) return NULL; ret = gcstring_new(NULL, lbobj); for (i = count; i; i--) { sv = POPs; if (SvOK(sv)) { gcstr = SVtogcstring(sv, lbobj); if (gcstr->gclen) gcstr->gcstr[0].flag = LINEBREAK_FLAG_ALLOW_BEFORE; gcstring_replace(ret, 0, 0, gcstr); if (!sv_isobject(sv)) gcstring_destroy(gcstr); } } PUTBACK; FREETMPS; LEAVE; return ret; } MODULE = Unicode::LineBreak PACKAGE = Unicode::LineBreak void EAWidths() INIT: char **p; PPCODE: for (p = (char **)linebreak_propvals_EA; *p != NULL; p++) XPUSHs(sv_2mortal(newSVpv(*p, 0))); void LBClasses() INIT: char **p; PPCODE: for (p = (char **)linebreak_propvals_LB; *p != NULL; p++) XPUSHs(sv_2mortal(newSVpv(*p, 0))); linebreak_t * _new(klass) char *klass; PROTOTYPE: $ CODE: if ((RETVAL = linebreak_new(ref_func)) == NULL) croak("%s->_new: %s", klass, strerror(errno)); linebreak_set_stash(RETVAL, newRV_noinc((SV *)newHV())); SvREFCNT_dec(RETVAL->stash); /* fixup */ OUTPUT: RETVAL linebreak_t * copy(self) linebreak_t *self; PROTOTYPE: $ CODE: RETVAL = linebreak_copy(self); OUTPUT: RETVAL void DESTROY(self) linebreak_t *self; PROTOTYPE: $ CODE: linebreak_destroy(self); SV * _config(self, ...) linebreak_t *self; PREINIT: size_t i; char *key; void *func; SV *val; char *opt; CODE: RETVAL = NULL; if (items < 2) croak("_config: Too few arguments"); else if (items < 3) { key = (char *)SvPV_nolen(ST(1)); if (strcasecmp(key, "BreakIndent") == 0) RETVAL = newSVuv(self->options & LINEBREAK_OPTION_BREAK_INDENT); else if (strcasecmp(key, "CharMax") == 0) RETVAL = newSVuv(self->charmax); else if (strcasecmp(key, "ColMax") == 0) RETVAL = newSVnv((NV)self->colmax); else if (strcasecmp(key, "ColMin") == 0) RETVAL = newSVnv((NV)self->colmin); else if (strcasecmp(key, "ComplexBreaking") == 0) RETVAL = newSVuv(self->options & LINEBREAK_OPTION_COMPLEX_BREAKING); else if (strcasecmp(key, "Context") == 0) { if (self->options & LINEBREAK_OPTION_EASTASIAN_CONTEXT) RETVAL = newSVpvn("EASTASIAN", 9); else RETVAL = newSVpvn("NONEASTASIAN", 12); } else if (strcasecmp(key, "EAWidth") == 0) { AV *av, *codes = NULL, *ret = NULL; propval_t p = PROP_UNKNOWN; unichar_t c; size_t i; if (self->map == NULL || self->mapsiz == 0) XSRETURN_UNDEF; for (i = 0; i < self->mapsiz; i++) if (self->map[i].eaw != PROP_UNKNOWN) { if (p != self->map[i].eaw){ p = self->map[i].eaw; codes = newAV(); av = newAV(); av_push(av, newRV_noinc((SV *)codes)); av_push(av, newSViv((IV)p)); if (ret == NULL) ret = newAV(); av_push(ret, newRV_noinc((SV *)av)); } for (c = self->map[i].beg; c <= self->map[i].end; c++) av_push(codes, newSVuv(c)); } if (ret == NULL) XSRETURN_UNDEF; RETVAL = newRV_noinc((SV *)ret); } else if (strcasecmp(key, "Format") == 0) { func = self->format_func; if (func == NULL) XSRETURN_UNDEF; else if (func == linebreak_format_NEWLINE) RETVAL = newSVpvn("NEWLINE", 7); else if (func == linebreak_format_SIMPLE) RETVAL = newSVpvn("SIMPLE", 6); else if (func == linebreak_format_TRIM) RETVAL = newSVpvn("TRIM", 4); else if (func == format_func) { if ((val = (SV *)self->format_data) == NULL) XSRETURN_UNDEF; ST(0) = val; /* should not be mortal. */ XSRETURN(1); } else croak("_config: internal error"); } else if (strcasecmp(key, "HangulAsAL") == 0) RETVAL = newSVuv(self->options & LINEBREAK_OPTION_HANGUL_AS_AL); else if (strcasecmp(key, "LBClass") == 0) { AV *av, *codes = NULL, *ret = NULL; propval_t p = PROP_UNKNOWN; unichar_t c; size_t i; if (self->map == NULL || self->mapsiz == 0) XSRETURN_UNDEF; for (i = 0; i < self->mapsiz; i++) if (self->map[i].lbc != PROP_UNKNOWN) { if (p != self->map[i].lbc){ p = self->map[i].lbc; codes = newAV(); av = newAV(); av_push(av, newRV_noinc((SV *)codes)); av_push(av, newSViv((IV)p)); if (ret == NULL) ret = newAV(); av_push(ret, newRV_noinc((SV *)av)); } for (c = self->map[i].beg; c <= self->map[i].end; c++) av_push(codes, newSVuv(c)); } if (ret == NULL) XSRETURN_UNDEF; RETVAL = newRV_noinc((SV *)ret); } else if (strcasecmp(key, "LegacyCM") == 0) RETVAL = newSVuv(self->options & LINEBREAK_OPTION_LEGACY_CM); else if (strcasecmp(key, "Newline") == 0) { unistr_t unistr = {self->newline.str, self->newline.len}; if (self->newline.str == NULL || self->newline.len == 0) RETVAL = unistrtoSV(&unistr, 0, 0); else RETVAL = unistrtoSV(&unistr, 0, self->newline.len); } else if (strcasecmp(key, "Prep") == 0) { AV *av; if (self->prep_func == NULL || self->prep_func[0] == NULL) XSRETURN_UNDEF; av = newAV(); for (i = 0; (func = self->prep_func[i]) != NULL; i++) if (func == linebreak_prep_URIBREAK) { if (self->prep_data == NULL || self->prep_data[i] == NULL) av_push(av, newSVpvn("NONBREAKURI", 11)); else av_push(av, newSVpvn("BREAKURI", 8)); } else if (func == prep_func) { if (self->prep_data == NULL || self->prep_data[i] == NULL) croak("_config: internal error"); SvREFCNT_inc(self->prep_data[i]); /* avoid freed */ av_push(av, self->prep_data[i]); } else croak("_config: internal error"); RETVAL = newRV_noinc((SV *)av); } else if (strcasecmp(key, "Sizing") == 0) { func = self->sizing_func; if (func == NULL) XSRETURN_UNDEF; else if (func == linebreak_sizing_UAX11) RETVAL = newSVpvn("UAX11", 5); else if (func == sizing_func) { if ((val = (SV *)self->sizing_data) == NULL) XSRETURN_UNDEF; ST(0) = val; /* should not be mortal. */ XSRETURN(1); } else croak("_config: internal error"); } else if (strcasecmp(key, "Urgent") == 0) { func = self->urgent_func; if (func == NULL) XSRETURN_UNDEF; else if (func == linebreak_urgent_ABORT) RETVAL = newSVpvn("CROAK", 5); else if (func == linebreak_urgent_FORCE) RETVAL = newSVpvn("FORCE", 5); else if (func == urgent_func) { if ((val = (SV *)self->urgent_data) == NULL) XSRETURN_UNDEF; ST(0) = val; /* should not be mortal. */ XSRETURN(1); } else croak("_config: internal error"); } else if (strcasecmp(key, "ViramaAsJoiner") == 0) RETVAL = newSVuv(self->options & LINEBREAK_OPTION_VIRAMA_AS_JOINER); else { warn("_config: Getting unknown option %s", key); XSRETURN_UNDEF; } } else if (!(items % 2)) croak("_config: Argument size mismatch"); else for (RETVAL = NULL, i = 1; i < items; i += 2) { if (!SvPOK(ST(i))) croak("_config: Illegal argument"); key = (char *)SvPV_nolen(ST(i)); val = ST(i + 1); if (strcasecmp(key, "Prep") == 0) { SV *sv, *pattern, *func; AV *av; REGEXP *rx = NULL; if (! SvOK(val)) linebreak_add_prep(self, NULL, NULL); else if (SvROK(val) && SvTYPE(av = (AV *)SvRV(val)) == SVt_PVAV && 0 < av_len(av) + 1) { pattern = *av_fetch(av, 0, 0); #if ((PERL_VERSION >= 10) || (PERL_VERSION >= 9 && PERL_SUBVERSION >= 5)) if (SvRXOK(pattern)) rx = SvRX(pattern); #else /* PERL_VERSION */ if (SvROK(pattern) && SvMAGICAL(sv = SvRV(pattern))) { MAGIC *mg; if ((mg = mg_find(sv, PERL_MAGIC_qr)) != NULL) rx = (REGEXP *)mg->mg_obj; } #endif if (rx != NULL) SvREFCNT_inc(pattern); /* FIXME:avoid freed */ else if (SvOK(pattern)) { #if ((PERL_VERSION >= 10) || (PERL_VERSION == 9 && PERL_SUBVERSION >= 5)) rx = pregcomp(pattern, 0); #else /* PERL_VERSION */ { PMOP *pm; New(1, pm, 1, PMOP); rx = pregcomp(SvPVX(pattern), SvEND(pattern), pm); } #endif if (rx != NULL) { #if PERL_VERSION >= 11 pattern = newRV_noinc((SV *)rx); sv_bless(pattern, gv_stashpv("Regexp", 0)); #else /* PERL_VERSION */ sv = newSV(0); sv_magic(sv, (SV *)rx, PERL_MAGIC_qr, NULL, 0); pattern = newRV_noinc(sv); sv_bless(pattern, gv_stashpv("Regexp", 0)); #endif } } else rx = NULL; if (rx == NULL) croak("_config: Not a regex"); if (av_fetch(av, 1, 0) == NULL) func = NULL; else if (SvOK(func = *av_fetch(av, 1, 0))) SvREFCNT_inc(func); /* avoid freed */ else func = NULL; av = newAV(); av_push(av, pattern); if (func != NULL) av_push(av, func); sv = newRV_noinc((SV *)av); linebreak_add_prep(self, prep_func, (void *)sv); SvREFCNT_dec(sv); /* fixup */ } else { char *s = SvPV_nolen(val); if (strcasecmp(s, "BREAKURI") == 0) linebreak_add_prep(self, linebreak_prep_URIBREAK, val); else if (strcasecmp(s, "NONBREAKURI") == 0) linebreak_add_prep(self, linebreak_prep_URIBREAK, NULL); else croak("_config: Unknown preprocess option: %s", s); } } else if (strcasecmp(key, "Format") == 0) { if (! SvOK(val)) linebreak_set_format(self, NULL, NULL); else if (sv_derived_from(val, "CODE")) linebreak_set_format(self, format_func, (void *)val); else { char *s = SvPV_nolen(val); if (strcasecmp(s, "DEFAULT") == 0) { warn("_config: " "Method name \"DEFAULT\" for Format option was " "obsoleted. Use \"SIMPLE\""); linebreak_set_format(self, linebreak_format_SIMPLE, NULL); } else if (strcasecmp(s, "SIMPLE") == 0) linebreak_set_format(self, linebreak_format_SIMPLE, NULL); else if (strcasecmp(s, "NEWLINE") == 0) linebreak_set_format(self, linebreak_format_NEWLINE, NULL); else if (strcasecmp(s, "TRIM") == 0) linebreak_set_format(self, linebreak_format_TRIM, NULL); else croak("_config: Unknown Format option: %s", s); } } else if (strcasecmp(key, "Sizing") == 0) { if (! SvOK(val)) linebreak_set_sizing(self, NULL, NULL); else if (sv_derived_from(val, "CODE")) linebreak_set_sizing(self, sizing_func, (void *)val); else { char *s = SvPV_nolen(val); if (strcasecmp(s, "DEFAULT") == 0) { warn("_config: " "Method name \"DEFAULT\" for Sizing option " "was obsoleted. Use \"UAX11\""); linebreak_set_sizing(self, linebreak_sizing_UAX11, NULL); } else if (strcasecmp(s, "UAX11") == 0) linebreak_set_sizing(self, linebreak_sizing_UAX11, NULL); else croak("_config: Unknown Sizing option: %s", s); } } else if (strcasecmp(key, "Urgent") == 0) { if (! SvOK(val)) linebreak_set_urgent(self, NULL, NULL); else if (sv_derived_from(val, "CODE")) linebreak_set_urgent(self, urgent_func, (void *)val); else { char *s = SvPV_nolen(val); if (strcasecmp(s, "NONBREAK") == 0) { warn("_config: " "Method name \"NONBREAK\" for Urgent " "option was obsoleted. Use undef"); linebreak_set_urgent(self, NULL, NULL); } else if (strcasecmp(s, "CROAK") == 0) linebreak_set_urgent(self, linebreak_urgent_ABORT, NULL); else if (strcasecmp(s, "FORCE") == 0) linebreak_set_urgent(self, linebreak_urgent_FORCE, NULL); else croak("_config: Unknown Urgent option: %s", s); } } else if (strcasecmp(key, "BreakIndent") == 0) { if (SVtoboolean(val)) self->options |= LINEBREAK_OPTION_BREAK_INDENT; else self->options &= ~LINEBREAK_OPTION_BREAK_INDENT; } else if (strcasecmp(key, "CharMax") == 0) self->charmax = SvUV(val); else if (strcasecmp(key, "ColMax") == 0) self->colmax = (double)SvNV(val); else if (strcasecmp(key, "ColMin") == 0) self->colmin = (double)SvNV(val); else if (strcasecmp(key, "ComplexBreaking") == 0) { if (SVtoboolean(val)) self->options |= LINEBREAK_OPTION_COMPLEX_BREAKING; else self->options &= ~LINEBREAK_OPTION_COMPLEX_BREAKING; } else if (strcasecmp(key, "Context") == 0) { if (SvOK(val)) opt = (char *)SvPV_nolen(val); else opt = NULL; if (opt && strcasecmp(opt, "EASTASIAN") == 0) self->options |= LINEBREAK_OPTION_EASTASIAN_CONTEXT; else self->options &= ~LINEBREAK_OPTION_EASTASIAN_CONTEXT; } else if (strcasecmp(key, "EAWidth") == 0) { AV *av, *codes; SV *sv; propval_t p; size_t i; if (! SvOK(val)) linebreak_clear_eawidth(self); else if (SvROK(val) && SvTYPE(av = (AV *)SvRV(val)) == SVt_PVAV && av_len(av) + 1 == 2 && av_fetch(av, 0, 0) != NULL && av_fetch(av, 1, 0) != NULL) { sv = *av_fetch(av, 1, 0); if (SvIOK(sv)) p = (propval_t) SvIV(sv); else croak("_config: Invalid argument"); sv = *av_fetch(av, 0, 0); if (SvROK(sv) && SvTYPE(codes = (AV *)SvRV(sv)) == SVt_PVAV) { for (i = 0; i < av_len(codes) + 1; i++) { if (av_fetch(codes, i, 0) == NULL) continue; if (! SvIOK(sv = *av_fetch(codes, i, 0))) croak("_config: Invalid argument"); linebreak_update_eawidth(self, (unichar_t) SvUV(sv), p); } } else if (SvIOK(sv)) { linebreak_update_eawidth(self, (unichar_t) SvUV(sv), p); } else croak("_config: Invalid argument"); } else croak("_config: Invalid argument"); } else if (strcasecmp(key, "HangulAsAL") == 0) { if (SVtoboolean(val)) self->options |= LINEBREAK_OPTION_HANGUL_AS_AL; else self->options &= ~LINEBREAK_OPTION_HANGUL_AS_AL; } else if (strcasecmp(key, "LBClass") == 0) { AV *av, *codes; SV *sv; propval_t p; size_t i; if (! SvOK(val)) linebreak_clear_lbclass(self); else if (SvROK(val) && SvTYPE(av = (AV *)SvRV(val)) == SVt_PVAV && av_len(av) + 1 == 2 && av_fetch(av, 0, 0) != NULL && av_fetch(av, 1, 0) != NULL) { sv = *av_fetch(av, 1, 0); if (SvIOK(sv)) p = (propval_t) SvIV(sv); else croak("_config: Invalid argument"); sv = *av_fetch(av, 0, 0); if (SvROK(sv) && SvTYPE(codes = (AV *)SvRV(sv)) == SVt_PVAV) { for (i = 0; i < av_len(codes) + 1; i++) { if (av_fetch(codes, i, 0) == NULL) continue; if (! SvIOK(sv = *av_fetch(codes, i, 0))) croak("_config: Invalid argument"); linebreak_update_lbclass(self, (unichar_t) SvUV(sv), p); } } else if (SvIOK(sv)) { linebreak_update_lbclass(self, (unichar_t) SvUV(sv), p); } else croak("_config: Invalid argument"); } else croak("_config: Invalid argument"); } else if (strcasecmp(key, "LegacyCM") == 0) { if (SVtoboolean(val)) self->options |= LINEBREAK_OPTION_LEGACY_CM; else self->options &= ~LINEBREAK_OPTION_LEGACY_CM; } else if (strcasecmp(key, "Newline") == 0) { if (!sv_isobject(val)) { unistr_t unistr = {NULL, 0}; SVtounistr(&unistr, val); linebreak_set_newline(self, &unistr); free(unistr.str); } else if (sv_derived_from(val, "Unicode::GCString")) linebreak_set_newline(self, (unistr_t *)PerltoC(gcstring_t *, val)); else croak("_config: Unknown object %s", HvNAME(SvSTASH(SvRV(val)))); } else if (strcasecmp(key, "ViramaAsJoiner") == 0) { if (SVtoboolean(val)) self->options |= LINEBREAK_OPTION_VIRAMA_AS_JOINER; else self->options &= ~LINEBREAK_OPTION_VIRAMA_AS_JOINER; } else warn("_config: Setting unknown option %s", key); } OUTPUT: RETVAL void as_hashref(self, ...) linebreak_t *self; CODE: if (self->stash == NULL) XSRETURN_UNDEF; ST(0) = self->stash; /* should not be mortal */ XSRETURN(1); SV* as_scalarref(self, ...) linebreak_t *self; PREINIT: char buf[64]; CODE: buf[0] = '\0'; snprintf(buf, 64, "%s(0x%lx)", HvNAME(SvSTASH(SvRV(ST(0)))), (unsigned long)(void *)self); RETVAL = newRV_noinc(newSVpv(buf, 0)); OUTPUT: RETVAL SV * as_string(self, ...) linebreak_t *self; PREINIT: char buf[64]; CODE: buf[0] = '\0'; snprintf(buf, 64, "%s(0x%lx)", HvNAME(SvSTASH(SvRV(ST(0)))), (unsigned long)(void *)self); RETVAL = newSVpv(buf, 0); OUTPUT: RETVAL propval_t lbrule(self, b_idx, a_idx) linebreak_t *self; propval_t b_idx; propval_t a_idx; PROTOTYPE: $$$ CODE: warn("lbrule() is obsoleted. Use breakingRule()"); if (!SvOK(ST(1)) || !SvOK(ST(2))) XSRETURN_UNDEF; if (self == NULL) XSRETURN_UNDEF; RETVAL = linebreak_get_lbrule(self, b_idx, a_idx); if (RETVAL == PROP_UNKNOWN) XSRETURN_UNDEF; OUTPUT: RETVAL propval_t breakingRule(lbobj, bgcstr, agcstr) linebreak_t *lbobj; generic_string bgcstr; generic_string agcstr; PROTOTYPE: $$$ PREINIT: propval_t blbc, albc; CODE: if (!SvOK(ST(1)) || !SvOK(ST(2))) XSRETURN_UNDEF; if (lbobj == NULL) XSRETURN_UNDEF; if ((blbc = gcstring_lbclass_ext(bgcstr, -1)) == PROP_UNKNOWN) XSRETURN_UNDEF; if ((albc = gcstring_lbclass(agcstr, 0)) == PROP_UNKNOWN) XSRETURN_UNDEF; RETVAL = linebreak_get_lbrule(lbobj, blbc, albc); if (RETVAL == PROP_UNKNOWN) XSRETURN_UNDEF; OUTPUT: RETVAL void reset(self) linebreak_t *self; PROTOTYPE: $ CODE: linebreak_reset(self); double strsize(lbobj, len, pre, spc, str, ...) linebreak_t *lbobj; double len; SV *pre; generic_string spc; generic_string str; PROTOTYPE: $$$$$;$ CODE: warn("strsize() is obsoleted. Use Unicode::GCString::columns"); if (5 < items) warn("``max'' argument of strsize was obsoleted"); RETVAL = linebreak_sizing_UAX11(lbobj, len, NULL, spc, str); if (RETVAL == -1.0) croak("strsize: %s", strerror(lbobj->errnum)); OUTPUT: RETVAL void break(self, input) linebreak_t *self; unistr_t *input; PROTOTYPE: $$ PREINIT: gcstring_t **ret, *r; size_t i; PPCODE: if (input == NULL) XSRETURN_UNDEF; ret = linebreak_break(self, input); if (ret == NULL) { if (self->errnum == LINEBREAK_EEXTN) croak("%s", SvPV_nolen(ERRSV)); else if (self->errnum == LINEBREAK_ELONG) croak("%s", "Excessive line was found"); else if (self->errnum) croak("%s", strerror(self->errnum)); else croak("%s", "Unknown error"); } switch (GIMME_V) { case G_SCALAR: r = gcstring_new(NULL, self); for (i = 0; ret[i] != NULL; i++) gcstring_append(r, ret[i]); linebreak_free_result(ret, 1); XPUSHs(sv_2mortal(unistrtoSV((unistr_t *)r, 0, r->len))); gcstring_destroy(r); XSRETURN(1); case G_ARRAY: for (i = 0; ret[i] != NULL; i++) XPUSHs(sv_2mortal(CtoPerl("Unicode::GCString", ret[i]))); linebreak_free_result(ret, 0); XSRETURN(i); default: linebreak_free_result(ret, 1); XSRETURN_EMPTY; } void break_partial(self, input) linebreak_t *self; unistr_t *input; PROTOTYPE: $$ PREINIT: gcstring_t **ret, *r; size_t i; PPCODE: ret = linebreak_break_partial(self, input); if (ret == NULL) { if (self->errnum == LINEBREAK_EEXTN) croak("%s", SvPV_nolen(ERRSV)); else if (self->errnum == LINEBREAK_ELONG) croak("%s", "Excessive line was found"); else if (self->errnum) croak("%s", strerror(self->errnum)); else croak("%s", "Unknown error"); } switch (GIMME_V) { case G_SCALAR: r = gcstring_new(NULL, self); for (i = 0; ret[i] != NULL; i++) gcstring_append(r, ret[i]); linebreak_free_result(ret, 1); XPUSHs(sv_2mortal(unistrtoSV((unistr_t *)r, 0, r->len))); gcstring_destroy(r); XSRETURN(1); case G_ARRAY: for (i = 0; ret[i] != NULL; i++) XPUSHs(sv_2mortal(CtoPerl("Unicode::GCString", ret[i]))); linebreak_free_result(ret, 0); XSRETURN(i); default: linebreak_free_result(ret, 1); XSRETURN_EMPTY; } const char * UNICODE_VERSION() CODE: RETVAL = linebreak_unicode_version; OUTPUT: RETVAL const char * SOMBOK_VERSION() CODE: RETVAL = SOMBOK_VERSION; OUTPUT: RETVAL MODULE = Unicode::LineBreak PACKAGE = Unicode::LineBreak::SouthEastAsian const char * supported() PROTOTYPE: CODE: RETVAL = linebreak_southeastasian_supported; if (RETVAL == NULL) XSRETURN_UNDEF; OUTPUT: RETVAL MODULE = Unicode::LineBreak PACKAGE = Unicode::GCString gcstring_t * _new(klass, str, lbobj=NULL) char *klass; unistr_t *str; linebreak_t *lbobj; PROTOTYPE: $$;$ CODE: if (str == NULL) XSRETURN_UNDEF; /* FIXME:buffer is copied twice. */ if ((RETVAL = gcstring_newcopy(str, lbobj)) == NULL) croak("%s->_new: %s", klass, strerror(errno)); OUTPUT: RETVAL void DESTROY(self) gcstring_t *self; PROTOTYPE: $ CODE: gcstring_destroy(self); void as_array(self) gcstring_t *self; PROTOTYPE: $ PREINIT: size_t i; PPCODE: if (self != NULL) for (i = 0; i < self->gclen; i++) XPUSHs(sv_2mortal( CtoPerl("Unicode::GCString", gcstring_substr(self, i, 1)))); SV* as_scalarref(self, ...) gcstring_t *self; PREINIT: char buf[64]; CODE: buf[0] = '\0'; snprintf(buf, 64, "%s(0x%lx)", HvNAME(SvSTASH(SvRV(ST(0)))), (unsigned long)(void *)self); RETVAL = newRV_noinc(newSVpv(buf, 0)); OUTPUT: RETVAL SV * as_string(self, ...) gcstring_t *self; PROTOTYPE: $;$;$ CODE: RETVAL = unistrtoSV((unistr_t *)self, 0, self->len); OUTPUT: RETVAL size_t chars(self) gcstring_t *self; PROTOTYPE: $ CODE: RETVAL = self->len; OUTPUT: RETVAL #define lbobj self->lbobj int cmp(self, str, swap=FALSE) gcstring_t *self; generic_string str; swapspec_t swap; PROTOTYPE: $$;$ CODE: if (swap == TRUE) RETVAL = gcstring_cmp(str, self); else RETVAL = gcstring_cmp(self, str); OUTPUT: RETVAL size_t columns(self) gcstring_t *self; CODE: RETVAL = gcstring_columns(self); OUTPUT: RETVAL #define lbobj self->lbobj gcstring_t * concat(self, str, swap=FALSE) gcstring_t *self; generic_string str; swapspec_t swap; PROTOTYPE: $$;$ CODE: if (swap == TRUE) RETVAL = gcstring_concat(str, self); else if (swap == -1) { gcstring_append(self, str); XSRETURN(1); } else RETVAL = gcstring_concat(self, str); OUTPUT: RETVAL gcstring_t * copy(self) gcstring_t *self; PROTOTYPE: $ CODE: RETVAL = gcstring_copy(self); OUTPUT: RETVAL int eos(self) gcstring_t *self; CODE: RETVAL = gcstring_eos(self); OUTPUT: RETVAL unsigned int flag(self, ...) gcstring_t *self; PROTOTYPE: $;$;$ PREINIT: int i; unsigned int flag; CODE: warn("flag() will be deprecated in near future"); if (1 < items) i = SvIV(ST(1)); else i = self->pos; if (i < 0 || self == NULL || self->gclen <= i) XSRETURN_UNDEF; if (2 < items) { flag = SvUV(ST(2)); if (flag == (flag & 255)) self->gcstr[i].flag = (unsigned char)flag; else warn("flag: unknown flag(s)"); } RETVAL = (unsigned int)self->gcstr[i].flag; OUTPUT: RETVAL gcstring_t * item(self, ...) gcstring_t *self; PROTOTYPE: $;$ PREINIT: int i; CODE: if (1 < items) i = SvIV(ST(1)); else i = self->pos; if (i < 0 || self == NULL || self->gclen <= i) XSRETURN_UNDEF; RETVAL = gcstring_substr(self, i, 1); OUTPUT: RETVAL gcstring_t * join(self, ...) gcstring_t *self; PREINIT: size_t i; gcstring_t *str; CODE: switch (items) { case 0: croak("join: Too few arguments"); case 1: RETVAL = gcstring_new(NULL, self->lbobj); break; case 2: RETVAL = SVtogcstring(ST(1), self->lbobj); if (sv_isobject(ST(1))) RETVAL = gcstring_copy(RETVAL); break; default: RETVAL = SVtogcstring(ST(1), self->lbobj); if (sv_isobject(ST(1))) RETVAL = gcstring_copy(RETVAL); for (i = 2; i < items; i++) { gcstring_append(RETVAL, self); str = SVtogcstring(ST(i), self->lbobj); gcstring_append(RETVAL, str); if (!sv_isobject(ST(i))) gcstring_destroy(str); } break; } OUTPUT: RETVAL propval_t lbc(self) gcstring_t *self; PROTOTYPE: $ CODE: if ((RETVAL = gcstring_lbclass(self, 0)) == PROP_UNKNOWN) XSRETURN_UNDEF; OUTPUT: RETVAL propval_t lbcext(self) gcstring_t *self; PROTOTYPE: $ CODE: if ((RETVAL = gcstring_lbclass_ext(self, -1)) == PROP_UNKNOWN) XSRETURN_UNDEF; OUTPUT: RETVAL propval_t lbclass(self, ...) gcstring_t *self; PROTOTYPE: $;$ PREINIT: int i; CODE: warn("lbclass() is obsoleted. Use lbc()"); if (1 < items) i = SvIV(ST(1)); else i = self->pos; RETVAL = gcstring_lbclass(self, i); if (RETVAL == PROP_UNKNOWN) XSRETURN_UNDEF; OUTPUT: RETVAL propval_t lbclass_ext(self, ...) gcstring_t *self; PROTOTYPE: $;$ PREINIT: int i; CODE: warn("lbclass_ext() is obsoleted. Use lbcext()"); if (1 < items) i = SvIV(ST(1)); else i = self->pos; RETVAL = gcstring_lbclass_ext(self, i); if (RETVAL == PROP_UNKNOWN) XSRETURN_UNDEF; OUTPUT: RETVAL size_t length(self) gcstring_t *self; PROTOTYPE: $ CODE: RETVAL = self->gclen; OUTPUT: RETVAL gcstring_t * next(self, ...) gcstring_t *self; PROTOTYPE: $;$;$ PREINIT: gcchar_t *gc; CODE: if (gcstring_eos(self)) XSRETURN_UNDEF; gc = gcstring_next(self); RETVAL = gcstring_substr(self, gc - self->gcstr, 1); OUTPUT: RETVAL size_t pos(self, ...) gcstring_t *self; PROTOTYPE: $;$ CODE: if (1 < items) gcstring_setpos(self, SvIV(ST(1))); RETVAL = self->pos; OUTPUT: RETVAL #define lbobj self->lbobj gcstring_t * substr(self, offset, length=self->gclen, replacement=NULL) gcstring_t *self; int offset; int length; generic_string replacement; PROTOTYPE: $$;$;$ CODE: RETVAL = gcstring_substr(self, offset, length); if (replacement != NULL) if (gcstring_replace(self, offset, length, replacement) == NULL) croak("substr: %s", strerror(errno)); if (RETVAL == NULL) croak("substr: %s", strerror(errno)); OUTPUT: RETVAL Unicode-LineBreak-Unicode-LineBreak-2016.007_02/MANIFEST000066400000000000000000000053041273566223400220550ustar00rootroot00000000000000ARTISTIC Changes Changes.REL1 GPL lib/POD2/JA/Text/LineFold.pod lib/POD2/JA/Unicode/GCString.pod lib/POD2/JA/Unicode/LineBreak.pod lib/Text/LineFold.pm lib/Unicode/GCString.pm lib/Unicode/GCString.pod lib/Unicode/LineBreak.pm lib/Unicode/LineBreak.pod lib/Unicode/LineBreak/Constants.pm lib/Unicode/LineBreak/Defaults.pm.sample LineBreak.xs Makefile.PL Makefile.PL.sombok MANIFEST This list of files perl-Unicode-LineBreak.spec ppport.h README sombok/ARTISTIC sombok/ChangeLog sombok/ChangeLog.REL1 sombok/COPYING sombok/include/sombok_constants.h sombok/include/sombok.h.in sombok/lib/5.1.0.c sombok/lib/5.2.0.c sombok/lib/6.0.0.c sombok/lib/6.1.0.c sombok/lib/6.2.0.c sombok/lib/6.3.0.c sombok/lib/7.0.0.c sombok/lib/8.0.0.c sombok/lib/break.c sombok/lib/charprop.c sombok/lib/gcstring.c sombok/lib/linebreak.c sombok/lib/southeastasian.c sombok/lib/utf8.c sombok/lib/utils.c sombok/README sombok/README.ja_JP sombok/UNICODE sombok/VERSION t/000.t t/00GraphemeBreakTest.t t/00LineBreakTest.t t/01break.t t/02hangul.t t/03ns.t t/04fold.t t/05urgent.t t/06context.t t/07sea.t t/08partial.t t/09uri.t t/10gcstring.t t/11format.t t/12fold2.t t/13flowedsp.t t/14sea_al.t t/15array.t t/16regex.t t/17prop.t t/18currency.t t/lb.pl t/lf.pl t/pod.t test-data/amitagyong.in test-data/amitagyong.out test-data/ar.in test-data/ar.out test-data/ecclesiazusae.CharactersMax.out test-data/ecclesiazusae.ColumnsMax.out test-data/ecclesiazusae.ColumnsMin.out test-data/ecclesiazusae.in test-data/ecclesiazusae.out test-data/el.in test-data/el.out test-data/flowedsp.in test-data/flowedsp.out test-data/fr.ea.out test-data/fr.fixed.out test-data/fr.flowed.out test-data/fr.format.out test-data/fr.in test-data/fr.newline.out test-data/fr.out test-data/fr.plain.out test-data/fr.wrap.out test-data/GraphemeBreakTest.txt test-data/he.in test-data/he.out test-data/ja-a.in test-data/ja-a.out test-data/ja-k.in test-data/ja-k.ns.out test-data/ja-k.out test-data/ja.fixed.out test-data/ja.flowed.out test-data/ja.format.out test-data/ja.in test-data/ja.out test-data/ja.plain.out test-data/ja.wrap.out test-data/ko-decomp.in test-data/ko-decomp.out test-data/ko.al.out test-data/ko.in test-data/ko.newline.out test-data/ko.out test-data/LineBreakTest.txt test-data/quotes.fixed.out test-data/quotes.flowed.out test-data/quotes.in test-data/quotes.norm.in test-data/quotes.plain.out test-data/README.test-data test-data/ru.in test-data/ru.out test-data/sa.in test-data/sa.out test-data/th.al.out test-data/th.in test-data/th.out test-data/titin.bz test-data/uri.break.http.out test-data/uri.break.out test-data/uri.in test-data/uri.nonbreak.out test-data/vi-decomp.in test-data/vi-decomp.out test-data/vi.in test-data/vi.out test-data/zh.in test-data/zh.out Todo.REL1 typemap Unicode-LineBreak-Unicode-LineBreak-2016.007_02/Makefile.PL000066400000000000000000000155731273566223400227070ustar00rootroot00000000000000use 5.008; use strict; package MY; use ExtUtils::MakeMaker; use Config; my $csubdir = 'sombok'; my $sombok_atleast_version = '2.4.0'; my $sombok_max_version = '2.99.99'; my $pkg_config = $ENV{'PKG_CONFIG'} || 'pkg-config'; my $enable_libthai = 1; my $libthai_inc = ''; my $libthai_libs = ''; my $bundled_sombok = 0; my $unicode; my $unicode_num; my $copy_unidata = 0; my $sombok; my $libthai; sub clean { my $inherited = shift->SUPER::clean(@_); $inherited =~ s{(\n*)\z} {\n\t\$(RM_F) $csubdir\$(DIRFILESEP)Makefile.PL $csubdir\$(DIRFILESEP)include\$(DIRFILESEP)sombok.h $csubdir\$(DIRFILESEP)lib\$(DIRFILESEP)*\$(OBJ_EXT)$1}; $inherited; } sub test { my $inherited = shift->SUPER::test(@_); # Get UNIDATA before testing. $inherited =~ s/^(test\s*:+\s*)/${1}copy_unidata /m if $copy_unidata; $inherited; } sub distdir { my $inherited = shift->SUPER::distdir(@_); if ($copy_unidata) { open my $mani, '<', 'MANIFEST' or die "$!"; $inherited =~ s/^(distdir\s*:+\s*)/${1}copy_unidata /m if scalar grep m{^test-data/LineBreakTest.txt}, <$mani>; close $mani; } $inherited; } sub postamble { my $self = shift; my $cd_make = $self->cd($csubdir, '$(MAKE) all'); my $make_json = $self->cd(q{$(DISTVNAME)}, q{$(FULLPERL) -MCPAN::Meta -MCPAN::Meta::Converter -e 'CPAN::Meta->new(CPAN::Meta::Converter->new(CPAN::Meta->load_file("META.yml"))->convert(version=>"2"))->save("META.json")'} ); my $make_rpm_spec = $self->cd(q{$(DISTVNAME)}, q{$(FULLPERL) -i -pe 's/^(%define version\s+).*/$${1}$(VERSION)/; s/^(%define sombok_version\s+).*/$${1}} . $sombok_atleast_version . q{/' perl-Unicode-LineBreak.spec}); return <; $unicode =~ s/\s+$//; close $fp; } foreach my $arg (@ARGV) { if ($arg eq '--disable-libthai') { $enable_libthai = 0; } elsif ($arg eq '--with-bundled-sombok') { $bundled_sombok = 1; } elsif ($arg =~ /^--with-unicode-version=(\S+)$/) { $unicode = $1; } } $unicode_num = $unicode; $unicode_num =~ s/[^.\d].*//; my %opts = ('LIBS' => '', 'INC' => ''); $enable_libthai = 0 if system("$pkg_config --exists libthai") >> 8; if ($enable_libthai) { $libthai = `$pkg_config --modversion libthai`; chomp $libthai; my $ver; if ($libthai and $libthai =~ m{^(\d+)\.(\d+)(?:\.(\d+))?}) { $ver = $1 + $2 * 0.001 + ($3 || 0) * 0.000001; } unless ($ver and 0.001009 <= $ver) { printf "Version of your libthai is %s. 0.1.9 or later is required. Update it or specify --disable-libthai option.\n", ($libthai || $ver); exit 1; } print "Use libthai $libthai\n"; $libthai_libs = `$pkg_config --libs libthai`; chomp $libthai_libs; $libthai_inc = `$pkg_config --cflags libthai`; chomp $libthai_inc; } if (system("$pkg_config --atleast-version=$sombok_atleast_version sombok") >> 8 or system("$pkg_config --max-version=$sombok_max_version sombok") >> 8) { $bundled_sombok = 1; } if ($bundled_sombok) { if (open my $fp, '<', "$csubdir/VERSION") { $sombok = <$fp>; close $fp; $sombok =~ s/\s+$//; $sombok ||= 'bundled'; } print "Use bundled sombok $sombok with Unicode $unicode\n"; # create sombok/Makefile.PL unlink "$csubdir/Makefile" if -e "$csubdir/Makefile"; open my $fp, '<', 'Makefile.PL.sombok' or die $!; my $h = join '', <$fp>; close $fp; $h =~ s/\@LIBTHAI_INC\@/$libthai_inc/g; $h =~ s/\@UNICODE_VERSION\@/$unicode/g; open my $ofp, '>', "$csubdir/Makefile.PL" or die $!; print $ofp $h; close $ofp; # create sombok/include/sombok.h open my $fp, '<', "$csubdir/include/sombok.h.in" or die $!; my $h = join '', <$fp>; close $fp; $h =~ s/#ifdef HAVE_CONFIG_H/#if 1/; if ($enable_libthai) { $h =~ s{\"config.h\"} {\"EXTERN.h\"\n#include \"perl.h\"\n#include \"XSUB.h\"\n#define USE_LIBTHAI \"libthai/$libthai\"}; } else { $h =~ s{\"config.h\"} {\"EXTERN.h\"\n#include \"perl.h\"\n#include \"XSUB.h\"\n#undef USE_LIBTHAI}; } $h =~ s/\@SOMBOK_UNICHAR_T\@/U32/; $h =~ s/\@PACKAGE_VERSION\@/$sombok/; $h =~ s/\@SOMBOK_UNICHAR_T_IS_WCHAR_T\@//; $h =~ s/\@SOMBOK_UNICHAR_T_IS_UNSIGNED_INT\@//; $h =~ s/\@SOMBOK_UNICHAR_T_IS_UNSIGNED_LONG\@//; open my $ofp, '>', "$csubdir/include/sombok.h" or die $!; print $ofp $h; close $ofp; #XXXmy $define = ($^O eq 'MSWin32') ? '-DMSDOS' : ''; my $myextlib; if ($^O eq 'MSWin32') { $myextlib = 'sombok\\libsombok$(LIB_EXT)'; } elsif ($^O eq 'darwin' && $Config{'ldflags'} =~ /-arch ppc64/) { $myextlib = '-all_load sombok/libsombok$(LIB_EXT)'; } else { $myextlib = 'sombok/libsombok$(LIB_EXT)'; } $opts{'MYEXTLIB'} = $myextlib; $opts{'INC'} .= " -I$csubdir\$(DIRFILESEP)include"; $opts{'depend'} = {'$(OBJECT)' => '$(MYEXTLIB)'}; } else { $sombok = `$pkg_config --modversion sombok`; chomp $sombok; print "Use sombok $sombok\n"; $opts{'LIBS'} .= ' ' . `$pkg_config --libs sombok`; $opts{'INC'} .= ' ' . `$pkg_config --cflags sombok`; } if ($enable_libthai) { $opts{'LIBS'} .= ' ' . $libthai_libs; $opts{'INC'} .= ' ' . $libthai_inc; } $opts{'LIBS'} =~ s/\s+/ /g; $opts{'INC'} =~ s/\s+/ /g; $copy_unidata = (!-e "test-data/LineBreakTest.txt" || !-e "test-data/GraphemeBreakTest.txt"); sub CheckExternalDependencies { my @dependencies = @_; for my $program (@dependencies) { if (system("which $program > /dev/null") != 0) { warn "W: Required test dependency not found: $program\n"; } } } CheckExternalDependencies('wget'); WriteMakefile( 'NAME' => 'Unicode::LineBreak', 'ABSTRACT_FROM' => 'lib/Unicode/LineBreak.pod', 'VERSION_FROM' => 'lib/Unicode/LineBreak.pm', 'AUTHOR' => q{Hatuka*nezumi - IKEDA Soji }, 'LICENSE' => 'perl', 'MIN_PERL_VERSION' => 5.008, 'CONFIGURE_REQUIRES' => {'ExtUtils::MakeMaker' => '6.26',}, 'BUILD_REQUIRES' => { 'ExtUtils::MakeMaker' => '6.26', 'Test::More' => '0.45', }, 'PREREQ_PM' => { 'Encode' => 1.98, 'MIME::Charset' => '1.006.2', }, 'META_MERGE' => { 'resources' => { 'license' => 'http://dev.perl.org/licenses/', 'repository' => 'https://github.com/hatukanezumi/Unicode-LineBreak/', }, }, %opts, 'dist' => {'PREOP' => "\$(MAKE) preop",}, ); Unicode-LineBreak-Unicode-LineBreak-2016.007_02/Makefile.PL.sombok000066400000000000000000000033211273566223400241640ustar00rootroot00000000000000#-*- perl -*- #-*- coding: us-ascii -*- use ExtUtils::MakeMaker; use Cwd; my $define = ''; $define .= ' -DWIN32 -DPERL_STATIC_SYMS' if ($^O eq 'MSWin32'); my $pwd = cwd(); WriteMakefile( NAME => 'sombok', # (doesn't matter what the name is here) oh yes it does DEFINE => $define, INC => "\"-I\$(PERL_INC)\" \"-I$pwd\$(DIRFILESEP)include\" @LIBTHAI_INC@", # force PERL_INC dir ahead of system -I's SKIP => [qw(dynamic dynamic_lib dlsyms)], OBJECT => '$(O_FILES)', clean => {'FILES' => 'libsombok$(LIB_EXT)'}, H => [qw(include/sombok.h include/sombok_constants.h $(PERL_INC)/config.h)], C => [qw(lib/break.c lib/charprop.c lib/gcstring.c lib/linebreak.c lib/southeastasian.c lib/utf8.c lib/utils.c lib/@UNICODE_VERSION@.c)] ); sub MY::constants { package MY; my $self = shift; $self->{INST_STATIC} = 'libsombok$(LIB_EXT)'; return $self->SUPER::constants(); } sub MY::top_targets { my $r = ' all :: static $(NOECHO) $(NOOP) config :: $(NOECHO) $(NOOP) lint: lint -abchx $(LIBSRCS) pure_all :: $(NOECHO) $(NOOP) '; $r .= ' # This is a workaround, the problem is that our old GNU make exports # variables into the environment so $(MYEXTLIB) is set in here to this # value which can not be built. sombok/libsombok.a: $(NOECHO) $(NOOP) ' unless $^O eq 'VMS'; return $r; } sub MY::c_o { package MY; my $self = shift; my $inherited = $self->SUPER::c_o(@_); $inherited =~ s{(:\n\t)(.*(?:\n\t.*)*)} { $1 . $self->cd('lib', split /(?. $$ Unicode-LineBreak-Unicode-LineBreak-2016.007_02/Todo.REL1000066400000000000000000000033611273566223400222570ustar00rootroot000000000000002009-10-17 Hatuka*nezumi - IKEDA Soji * Text::Wrap compatible functions. - Unsatisfactorily DONE for wrap() by 1.005. 2009-05-29 Hatuka*nezumi - IKEDA Soji * Perform appropriate line breaking for South East Asian complex contexts (SA), at least on Thai and Khmer scripts. - For Thai, libthai package will be useful. - DONE by 1.000_01. - Some implementations can be found for Khmer. - Burmese, Lao, Tai Le, New Tai Lue, Tai Tham and Tai Viet - No plans. 2009-05-17 Hatuka*nezumi - IKEDA Soji * Full XS version for speed. - DONE by 1.004, except functions using regexp. 2009-05-11 Hatuka*nezumi - IKEDA Soji * Incremental input. - DONE by 0.005. 2009-05-02 Hatuka*nezumi - IKEDA Soji * Text::LineFold: obsoleted RFC 2646 folding and unfolding. - DONE by 1.007. 2009-05-02 Hatuka*nezumi - IKEDA Soji * ``Hunging punctuation'' (ぶら下げ組み; burasage-gumi) mainly for Japanese texts. - Punctuations may protrude into the right -- bottom, in vertical line -- margin. - It is usually applied to IDEOGRAPHIC FULL STOP and IDEOGRAPHIC COMMA. It is less frequently applied to FULL STOP and COMMA. - ``Half-''hunging, regarding ideographic punctuations as narrow only at end of line, might be either allowed or inhibited by typesetting rules, whether (full-)hunging was allowed or not. - I. FULL STOP is more often inhibited to be narrow than I. COMMA. - Wide (including fullwidth) parentheses may or may not ``half''-hung by rules, however, they shouldn't ``full''-hung anyway. Local Variables: mode: change-log change-log-default-name: "Todo" tab-width: 2 left-margin: 2 End: Unicode-LineBreak-Unicode-LineBreak-2016.007_02/lib/000077500000000000000000000000001273566223400214705ustar00rootroot00000000000000Unicode-LineBreak-Unicode-LineBreak-2016.007_02/lib/POD2/000077500000000000000000000000001273566223400221745ustar00rootroot00000000000000Unicode-LineBreak-Unicode-LineBreak-2016.007_02/lib/POD2/JA/000077500000000000000000000000001273566223400224665ustar00rootroot00000000000000Unicode-LineBreak-Unicode-LineBreak-2016.007_02/lib/POD2/JA/Text/000077500000000000000000000000001273566223400234125ustar00rootroot00000000000000Unicode-LineBreak-Unicode-LineBreak-2016.007_02/lib/POD2/JA/Text/LineFold.pod000066400000000000000000000104611273566223400256140ustar00rootroot00000000000000use utf8; =encoding utf-8 =head1 NAME Text::LineFold~[ja] - プレインテキストの行折り =head1 SYNOPSIS use Text::LineFold; $lf = Text::LineFold->new(); # 行折りする $folded = $lf->fold($string, 'PLAIN'); $indented = $lf->fold(' ' x 8, ' ' x 4, $string); # 行折りを戻す $unfolded = $lf->unfold($string, 'FIXED'); =head1 DESCRIPTION Text::LineFold は、プレインテキストの行折りをしたり行折りを戻したりする。 電子メールメッセージを主眼に置いており、RFC 3676 の flowed 形式にも対応する。 =head2 公開インタフェース =over 4 =item new ([KEY => VALUE, ...]) I<コンストラクタ>。 KEY => VALUE の対については config メソッドを参照。 =item $self->config (KEY) =item $self->config ([KEY => VAL, ...]) I<インスタンスメソッド>。 設定を取得または更新する。以下の KEY => VALUE 対を指定できる。 =over 4 =item Charset => CHARSET 入力文字列を符号化しているキャラクタセット。 文字列または L オブジェクト。 初期値は C<"UTF-8">。 =item Language => LANGUAGE Charset オプションとともに、言語/地域の文脈を決めるのに使える。 初期値は C<"XX">。 L オプションも参照。 =item Newline => STRING 改行の文字列。 初期値は C<"\n">。 =item OutputCharset => CHARSET fold()/unfold() の結果を符号化するキャラクタセット。 文字列または L オブジェクト。 特殊値 C<"_UNICODE_"> を指定すると、結果は Unicode 文字列となる。 初期値は Charset オプションの値。 =item TabSize => NUMBER タブストップの桁数。 0 を指定すると、タブストップを無視する。 初期値は 8。 =item BreakIndent =item CharMax =item ColMax =item ColMin =item ComplexBreaking =item EAWidth =item HangulAsAL =item LBClass =item LegacyCM =item Prep =item Urgent L を参照。 =back =item $self->fold (STRING, [METHOD]) =item $self->fold (INITIAL_TAB, SUBSEQUENT_TAB, STRING, ...) I<インスタンスメソッド>。 文字列 STRING を行折りする。 行末の余分な空白文字や水平タブ文字を除去し、 改行の文字列を Newline オプションで指定したものに置き換え、 テキスト末尾に改行がなければ追加する。 水平タブ文字は TabSize オプションによる幅のタブストップと見なす。 ひとつめの形式では、METHOD 引数に以下のオプションを指定できる。 =over 4 =item C<"FIXED"> C<"E"> で始まる行は行折りしない。 段落は空行で分かたれる。 =item C<"FLOWED"> RFC 3676 で定義される C<"Format=Flowed; DelSp=Yes"> 形式。 =item C<"PLAIN"> 初期の方法。すべての行を行折りする。 =back ふたつめの形式は、L に似ている。 すべての行を行折りする。段落の先頭には INITIAL_TAB を、ほかの行の先頭には SUBSEQUENT_TAB を挿入する。 =item $self->unfold (STRING, METHOD) 文字列 STRING の行折りされた段落をつなぎ直してそれを返す。 METHOD 引数には以下のオプションを指定できる。 =over 4 =item C<"FIXED"> 初期の方法。 C<"E"> で始まる行はつなぎ直さない。 空行を段落の区切りとみなす。 =item C<"FLOWED"> RFC 3676 で定義される C<"Format=Flowed; DelSp=Yes"> 形式をつなぎ直す。 =item C<"FLOWEDSP"> RFC 3676 で定義される C<"Format=Flowed; DelSp=No"> 形式をつなぎ直す。 =begin comment =item C<"OBSFLOWED"> RFC 2646 (廃止) で定義される C<"Format=Flowed"> 形式をできるだけうまくつなぎ直す。 =end comment =back =back =head1 BUGS バグやバグのような動作は開発者に教えてください。 CPAN Request Tracker: L. =head1 VERSION $VERSION 変数を見てほしい。 =head1 SEE ALSO L, L. =head1 AUTHOR Copyright (C) 2009-2012 Hatuka*nezumi - IKEDA Soji . This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. Unicode-LineBreak-Unicode-LineBreak-2016.007_02/lib/POD2/JA/Unicode/000077500000000000000000000000001273566223400240545ustar00rootroot00000000000000Unicode-LineBreak-Unicode-LineBreak-2016.007_02/lib/POD2/JA/Unicode/GCString.pod000066400000000000000000000176571273566223400262600ustar00rootroot00000000000000=encoding utf-8 =head1 NAME Unicode::GCString~[ja] - UAX #29 書記素クラスタの列としての文字列 =head1 SYNOPSIS use Unicode::GCString; $gcstring = Unicode::GCString->new($string); =head1 DESCRIPTION Unicode::GCString はUnicode文字列を、Unicode標準附属書29 [UAX #29] で定義される「拡張書記素クラスタ」〔extended grapheme cluster〕の列として扱う。 B<書記素クラスタ>〔grapheme cluster〕は、Unicode文字の列で、ひとつのB<書記素基底>〔grapheme base〕と、付加的なB<書記素エキステンダ>〔grapheme extender〕および/またはB<「前置」文字>〔“prepend” character〕から成る。これは人が「文字」とみなすものに近い。 =head2 公開インタフェース =head3 コンストラクタ =over 4 =item new (STRING, [KEY => VALUE, ...]) =item new (STRING, [LINEBREAK]) I<コンストラクタ>。 Unicode文字列 STRING から新たに書記素クラスタ文字列 (Unicode::GCString オブジェクト) を作る。 KEY => VALUE の対については Lを参照。 第二の形式では、 L オブジェクト LINEBREAK で分節の仕様を決定する。 B<注>: 最初の形式はリリース 2012.10 で導入された。 =item copy I<コピーコンストラクタ>。 書記素クラスタ文字列の複製を作る。 新たな文字列では、次の位置は先頭になる。 =back =head3 長さ =over 4 =item chars I<インスタンスメソッド>。 書記素クラスタ文字列に含まれるUnicode文字の数、つまりUnicode文字列としての長さを返す。 =item columns I<インスタンスメソッド>。 組み込みの文字データベースで決定される書記素クラスタ文字列の桁数を返す。 詳しくは L を参照。 =item length I<インスタンスメソッド>。 書記素クラスタ文字列に含まれる書記素クラスタの数を返す。 =back =head3 文字列としての操作 =over 4 =item as_string =item C<">OBJECTC<"> I<インスタンスメソッド>。 書記素クラスタ文字列を明示的にUnicode文字列に変換する。 =item cmp (STRING) =item STRING C STRING I<インスタンスメソッド>。 文字列を比較する。特に風変わりなところはない。 文字列のどちらかがUnicode文字列でもよい。 =item concat (STRING) =item STRING C<.> STRING I<インスタンスメソッド>。 書記素クラスタ文字列を結合する。 STRING のどちらかがUnicode文字列でもよい。 結果の文字列の桁数 (columns() を参照) や書記素クラスタの数 (length() を参照) は、ふたつの文字列のそれの和になるとはかぎらないことに注意。 新たな文字列では、次の位置は左辺の文字列にセットされていた位置になる。 =item join ([STRING, ...]) I<インスタンスメソッド>。 STRING を、書記素クラスタ文字列をはさんでつなげる。 STRING のうちに Unicode文字列があってもよい。 =item substr (OFFSET, [LENGTH, [REPLACEMENT]]) I<インスタンスメソッド>。 書記素クラスタ文字列の部分文字列を返す。 OFFSET と LENGTH は書記素クラスタで数える。 REPLACEMENT を指定すると、部分文字列をそれで置き換える。 REPLACEMENT は Unicode文字列でもよい。 Note: このメソッドは組み込み関数 substr() と異なり、左辺値を返すことはない。 =back =head3 書記素クラスタの列としての操作 =over 4 =item as_array =item C<@{>OBJECTC<}> =item as_arrayref I<インスタンスメソッド>。 書記素クラスタ文字列を、書記素クラスタの情報の配列に変換する。 =item eos I<インスタンスメソッド>。 現在の位置が書記素クラスタ文字列の最後かどうか調べる。 =item item ([OFFSET]) I<インスタンスメソッド>。 OFFSET番めの書記素クラスタを返す。 OFFSET を指定しないと、次の位置の書記素クラスタの情報を返す。 =item next =item C>OBJECTC> I<インスタンスメソッド>、反復的。 次の位置の書記素クラスタを返し、次の位置をひとつ進める。 =item pos ([OFFSET]) I<インスタンスメソッド>。 OFFSET を指定した場合は、次の位置をそれにする。 書記素クラスタ文字列の次の位置を返す。 =back =begin comment =head4 廃止予定のメソッド =over 4 =item flag ([OFFSET, [VALUE]]) I<インスタンスメソッド>。 OFFSET番めの書記素クラスタのフラグ値を取得、設定する。 OFFSET を指定しないと、次の位置の書記素クラスタのフラグ値を返す。 フラグ値は 255 を超えない非負整数で、はじめは 0。 定義ずみのフラグには次のものがある。 =over 4 =item Unicode::LineBreak::ALLOW_BEFORE この書記素クラスタの直前で行分割を許す。 =item Unicode::LineBreak::PROHIBIT_BEFORE この書記素クラスタの直前での行分割を禁ずる。 =back =item lbclass ([OFFSET]) I<インスタンスメソッド>。 OFFSET番めの書記素クラスタの最初の文字の行分割クラス (L 参照) を返す。 OFFSET を指定しないと、次の位置の書記素クラスタの情報を返す。 B<注>: lbc() を使ってほしい。 =item lbclass_ext ([OFFSET]) I<インスタンスメソッド>。 OFFSET番めの書記素クラスタの最後の書記素エキステンダの行分割クラス (L 参照) を返す。 書記素エキステンダがないか、またはクラスが CM の場合は、lbclass() の値を返す。 B<注>: lbcext() を使ってほしい。 =back =end comment =head3 その他 =over 4 =item lbc I<インスタンスメソッド>。 最初の書記素クラスタの最初の文字の行分割クラス (L 参照) を返す。 =item lbcext I<インスタンスメソッド>。 最後の書記素クラスタの最後の書記素エキステンダの行分割クラス (L 参照) を返す。 書記素エキステンダがないか、またはクラスが CM の場合は、 最後の書記素基底の行分割クラスを返す。 =back =head1 CAVEATS =over 4 =item * 書記素クラスタを「書記素」と呼ぶべきではない (ラリーはそう呼ぶが)。 =item * Perl の 5.10.1 版あたりでは、Unicode::GCString オブジェクトから Unicode 文字列への暗黙の変換が C<"utf8_mg_pos_cache_update"> キャッシュを混乱させることがある。 たとえば、つぎのように $sub = substr($gcstring, $i, $j); するかわりに、つぎのようにするとよい。 $sub = substr("$gcstring", $i, $j); $sub = substr($gcstring->as_string, $i, $j); =item * このモジュールでは「初期の」書記素クラスタ境界判別アルゴリズムを実装している。 手直し〔tailoring〕の機構にはまだ対応していない。 =back =head1 VERSION $VERSION 変数を参照してほしい。 =head2 非互換な変更 =over 4 =item 2013.10 =over 4 =item * new() メソッドは非Unicode文字列を引数に取れるようになった。 その場合、文字列をiso-8859-1 (Latin 1) キャラクタセットで復号する。 以前のリリースでは、このメソッドに非Unicodeを入力すると死ぬようになっていた。 =back =back =head1 SEE ALSO [UAX #29] Mark Davis (ed.) (2009-2013). I, Revisions 15-23. L. =head1 AUTHOR Hatuka*nezumi - IKEDA Soji =head1 COPYRIGHT Copyright (C) 2009-2013 Hatuka*nezumi - IKEDA Soji. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. Unicode-LineBreak-Unicode-LineBreak-2016.007_02/lib/POD2/JA/Unicode/LineBreak.pod000066400000000000000000000733331273566223400264250ustar00rootroot00000000000000=encoding utf-8 =head1 NAME Unicode::LineBreak~[ja] - UAX #14 Unicode 行分割アルゴリズム =head1 SYNOPSIS use Unicode::LineBreak; $lb = Unicode::LineBreak->new(); $broken = $lb->break($string); =head1 DESCRIPTION Unicode::LineBreak は、Unicode 標準の附属書14 [UAX #14] で述べる Unicode 行分割アルゴリズムを実行する。 分割位置を決定する際に、附属書11 [UAX #11] で定義される East_Asian_Width 参考特性も考慮する。 =head2 用語 便宜的に以下の用語を使う。 B<強制分割>〔mandatory break〕は、基本規則で定められており、周囲の文字に関係なく義務的に実行される行分割動作。 B<任意分割>は、基本規則で認められており、ユーザが実行すると決めた場合に行われる行分割動作。 [UAX #14] で定義される任意分割にはB<直接分割>〔direct break〕とB<間接分割>〔indirect break〕とがある。 B<音素文字的な文字>〔alphabetic characters〕は、通常、他の文字が分割の機会を与えないかぎり、文字同士の間で行分割できない文字。 B<表語文字的な文字>〔ideographic characters〕は、通常、その前後で行分割できる文字。 [UAX #14] では音素文字的な文字のほとんどを AL に、表語文字的な文字のほとんどを ID に分類している (これらの用語は文字学の観点からすれば不正確である)。 若干の用字系では、個々の文字からは分割位置が明確にならないため、辞書による発見的方法を用いる。 文字列のB<桁数>は、文字列に含まれる文字の数と等しいとはかぎらない。 個々の文字はB<広い>〔wide〕か、B<狭い>〔narrow〕か、前進を伴わない〔nonspacing〕かのいずれかであり、各々 2 桁、1 桁、0 桁を占める。 若干の文字は、使われる文脈によって広くも狭くもなり得る。 カスタマイズによって、文字はより多様な幅を持ちうる。 =head1 PUBLIC INTERFACE =head2 行の分割 =over 4 =item new ([KEY => VALUE, ...]) I<コンストラクタ>。 KEY => VALUE の対については L を参照。 =item break (STRING) I<インスタンスメソッド>。 Unicode 文字列 STRING を分割し、それを返す。 配列コンテクストでは、結果の各行の配列を返す。 =item break_partial (STRING) I<インスタンスメソッド>。 break() と同じだが、文字列を少しずつ追加して入力する場合。 入力が完了したことを示すには、STRING 引数に C を与える。 =item config (KEY) =item config (KEY => VALUE, ...) I<インスタンスメソッド>。 設定を取得または変更する。 KEY => VALUE の対については L を参照。 =item copy I<コピーコンストラクタ>。 オブジェクトインスタンスの複製をつくる。 =begin comment =item reset I. =end comment =back =head2 情報の取得 =over 4 =item breakingRule (BEFORESTR, AFTERSTR) I<インスタンスメソッド>。 文字列 BEFORESTR と AFTERSTR の間での行分割動作を得る。 返値については L を参照。 B<注>: このメソッドは、行分割のおおまかな動作を表す値を返すにすぎない。 実際のテキストを行折りするには、break() 等のメソッドを使ってほしい。 =item context ([Charset => CHARSET], [Language => LANGUAGE]) I<関数>。 キャラクタセット CHARSET および言語コード LANGUAGE から、それを使う言語/地域の文脈を得る。 =back =begin comment =head3 廃止予定のメソッド =over 4 =item lbrule (BEFORE, AFTER) I<インスタンスメソッド>。 分類 BEFORE と分類 AFTER の間での行分割動作を得る。 返値については L を参照。 B<注>: このメソッドは、行分割のおおまかな動作を表す値を返すにすぎない。 B<注>: breakingRule() を使ってほしい。 =item strsize (LEN, PRE, SPC, STR) I<インスタンスメソッド>。 [UAX #11] で定義された文字幅に基づいて、Unicode 文字列 PRE.SPC.STR のI<桁数>を算出する。 B<注>: L を使ってほしい。 =back =end comment =head2 オプション L、L の両メソッドには以下の対を指定できる。 桁数の算出 ([B])、書記素クラスタ分節 ([B]) (L も参照)、行分割動作 ([B]) に影響するものがある。 =over 4 =item BreakIndent => C<"YES"> | C<"NO"> [B] 行頭の SPACE の並び (インデント) の後では常に分割を許す。 [UAX #14] は SPACE のこのような用法を考慮していない。 初期値は C<"YES">。 B<注>: このオプションはリリース 1.011 で導入された。 =item CharMax => NUMBER [B] 行に含みうる最大の文字数。行末の空白文字と改行の文字列を除く。 文字数は一般に行の長さを表さないことに注意。 初期値は C<998>。 C<0> にはできない。 =item ColMin => NUMBER [B] 任意分割された行の、改行の文字列と行末の空白文字を含めない最小桁数。 初期値は C<0>。 =item ColMax => NUMBER [B] 行の、改行の文字列と行末の空白文字を含めない最大桁数。つまり、行の最大長。 初期値は C<76>。 =back L オプションおよび L も参照。 =over 4 =item ComplexBreaking => C<"YES"> | C<"NO"> [B] 東南アジアの複雑な文脈で、発見的な行折りを行う。 初期値は、東南アジアの表記体系での単語分節が有効なら C<"YES">。 =item Context => CONTEXT [B][B] 言語/地域の文脈を指定する。 現在使える文脈は C<"EASTASIAN"> か C<"NONEASTASIAN">。 初期の文脈は C<"NONEASTASIAN">。 C<"EASTASIAN"> 文脈では、East_Asian_Width 特性が曖昧 (A) であれば「広い」文字とみなし、行分割特性が AI であれば表語文字的 (ID) とみなす。 C<"NONEASTASIAN"> 文脈では、East_Asian_Width 特性が曖昧 (A) であれば「狭い」文字とみなし、行分割特性が AI であれば音素文字的 (AL) とみなす。 =item EAWidth => C<[> ORD C<=E> PROPERTY C<]> =item EAWidth => C [B] 個々の文字の East_Asian_Width 特性を手直しする。 ORD は文字の UCS インデクス値か、それらの配列への参照。 PROPERTY は East_Asian_Width 特性値か拡張値のいずれか (L を参照)。 このオプションは複数回指定できる。 C を指定すると、それまでの手直しをすべて取り消す。 初期値では、East_Asian_width 特性の手直しはしない。 L も参照。 =item Format => METHOD [B] 分割した行を整形する方法を指定する。 =over 4 =item C<"SIMPLE"> 初期の方法。 任意分割の位置に改行を挿入するだけ。 =item C<"NEWLINE"> L オプションで指定したもので改行を置き換える。 改行の前とテキスト終端の空白文字を除去する。 テキスト終端に改行がなければ追加する。 =item C<"TRIM"> 任意分割の位置に改行を挿入する。 改行の前の空白文字を除去する。 =item C なにもしない (改行の挿入も)。 =item サブルーチンへの参照 L を参照。 =back =item HangulAsAL => C<"YES"> | C<"NO"> [B] ハングル音節とハングル連結チャモ〔conjoining jamo〕を音素文字的な文字 (AL) と扱う。 初期値は C<"NO">。 =item LBClass => C<[> ORD C<=E> CLASS C<]> =item LBClass => C [B][B] 個々の文字の行分割特性 (分類) を手直しする。 ORD は文字の UCS インデクス値か、それらの配列への参照。 CLASS は行分割特性値のいずれか (L を参照)。 このオプションは複数回指定できる。 C を指定すると、それまでの手直しをすべて取り消す。 初期値では、行分割特性の手直しはしない。 L も参照。 =item LegacyCM => C<"YES"> | C<"NO"> [B][B] 前に空白文字がついた結合文字を単独の結合文字 (ID) と扱う。 Unicode 5.0 版からは、空白文字のこのような使いかたは推奨されない。 初期値は C<"YES">。 =item Newline => STRING [B] 改行の文字列とする Unicode 文字列。 初期値は C<"\n">。 =item Prep => METHOD [B] ユーザ定義の行分割動作を追加する。 このオプションは複数回指定できる。 METHOD には以下のものを指定できる。 =over 4 =item C<"NONBREAKURI"> URI を分割しない。 =item C<"BREAKURI"> URI を、印刷物に適した規則で分割する。 詳しくは [CMOS] の 6.17 節と 17.11 節を参照。 =item C<[> REGEX, SUBREF C<]> 正規表現 REGEX にマッチする文字列を、SUBREF で参照されるサブルーチンで分割する。 詳細は L を参照。 =item C それまでに追加した動作をすべて取り消す。 =back =item Sizing => METHOD [B] 文字列の長さを算出する方法を指定する。 以下のオプションが使える。 =over 4 =item C<"UAX11"> 初期の方法。 組み込みの文字データベースによって文字の桁数を算出する。 =item C 文字列に含まれる書記素クラスタ (L 参照) の数を返す。 =item サブルーチンへの参照 L を参照。 =back L、L、L オプションも参照。 =item Urgent => METHOD [B] 長すぎる行の扱いかたを指定する。 以下のオプションが使える。 =over 4 =item C<"CROAK"> エラーメッセージを出力して死ぬ。 =item C<"FORCE"> 長すぎる文字列を無理やり分割する。 =item C 初期の方法。 長すぎる文字列も分割しない。 =item サブルーチンへの参照 L を参照。 =back =item ViramaAsJoiner => C<"YES"> | C<"NO"> [B] ヴィラーマ記号 (ヒンディ語では「ハラント」、クメール文字での「脚」) とそれに続く字とを分離しない。 初期値は C<"YES">。 B<注>: このオプションはリリース 2011.001_29 で導入された。 以前のリリースでは C<"NO"> に固定であった。 これは、[UAX #29] で定義する「初期の」書記素クラスタには含まれない仕様である。 =back =begin comment =head3 旧式なオプション =over 4 =item TailorEA => C<[> ORD C<=E> PROPERTY, ... C<]> L の旧式な形式。 =item TailorLB => C<[> ORD C<=E> CLASS, ... C<]> L の旧式な形式。 =item UserBreaking => C<[>METHOD, ...C<]> L の旧式な形式。 =back =end comment =head2 定数 =over 4 =item C, C, C, C, C, C [UAX #11] で定義される 6 つの East_Asian_Width 特性値。 狭 (Na)、中立 (N)、曖昧 (A)、広 (W)、半角 (H)、全角 (F)。 =item C 前進を伴わない文字の East_Asian_Width 特性の値。 B<注>: この「前進を伴わない」値は当モジュールによる拡張であり、 [UAX #11] の一部ではない。 =begin comment C and C: Undocumented. 以前のリリースには C しかなく、C と C は リリース 2012.10 で追加された。 =end comment =item C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C [UAX #14] で定義される 40 の行分割特性値 (分類)。 B<注>: 特性値 CP はUnicode 5.2.0版で導入された。 特性値 HL と CJ はUnicode 6.1.0版で導入された。 特性値 RI は Unicode 6.2.0版で導入された。 =item C, C, C, C 行分割動作を表す 4 つの値。 強制分割。直接分割も間接分割も認める。間接分割を認めるが直接分割は禁ずる。分割を禁ずる。 =item C 東南アジアの表記体系のための単語分節機能が有効かどうかを示すフラグ。 この機能が有効になっていれば、空でない文字列。 そうでなければ C。 B<注>: 現リリースでは現代タイ語のタイ文字にのみ対応している。 =item C このモジュールが参照する Unicode 標準の版を示す文字列。 =back =head1 CUSTOMIZATION =head2 行の整形 L オプションにサブルーチンへの参照を指定する場合、そのサブルーチンは 3 つの引数を取らなければならない。 $修正後 = &サブルーチン(SELF, EVENT, STR); SELF は Unicode::LineBreak オブジェクト、EVENT はサブルーチンが呼ばれた文脈を表す文字列、STR は分割位置の前または後の Unicode 文字列の断片。 EVENT |駆動の契機 |STR ----------------------------------------------------------------- "sot" |テキスト先頭 |最初の行の断片 "sop" |強制分割の後 |次の行の断片 "sol" |任意分割の後 |続きの行の断片 "" |分割の直前 |行全体 (終端の空白文字を除く) "eol" |任意分割 |分割位置の前の空白文字 "eop" |強制分割 |改行とその前の空白文字 "eot" |テキスト終端 |テキスト終端の空白文字 (と改行) ----------------------------------------------------------------- サブルーチンは、テキストの断片を修正して返さなければならない。なにも修正しなかったことを示すには、C を返せばよい。 なお、C<"sot">、C<"sop">、C<"sol"> の文脈での修正はその後の分割位置の決定に影響するが、ほかの文脈での修正は影響しない。 B<注意>: 文字列の引数は実際には書記素クラスタ列である。 L 参照。 たとえば次のコードは、行末の空白を取り除いて行折りをする。 sub fmt { if ($_[1] =~ /^eo/) { return "\n"; } return undef; } my $lb = Unicode::LineBreak->new(Format => \&fmt); $output = $lb->break($text); =head2 ユーザ定義の行分割動作 任意分割によって生じる行が CharMax、ColMax、ColMin のいずれかの制限を超えると見込まれるときは、引き続く文字列に対してB<緊急分割>を実行できる。 L オプションにサブルーチンへの参照を指定する場合、そのサブルーチンは 2 つの引数を取らなければならない。 @分割後 = &サブルーチン(SELF, STR); SELF は Unicode::LineBreak オブジェクト、STR は分割すべき Unicode 文字列。 サブルーチンは、文字列 STR を分割した結果の配列を返さなければならない。 B<注意>: 文字列の引数は実際には書記素クラスタ列である。 L 参照。 たとえば次のコードは、若干の化学物質 (チチンのような) の名称にハイフンを挿入し、行折りできるようにする。 sub hyphenize { return map {$_ =~ s/yl$/yl-/; $_} split /(\w+?yl(?=\w))/, $_[1]; } my $lb = Unicode::LineBreak->new(Urgent => \&hyphenize); $output = $lb->break("Methionylthreonylthreonylglutaminylarginyl..."); L オプションに [REGEX, SUBREF] の配列参照を指定する場合、サブルーチンは 2 つの引数を取らなければならない。 @分割後 = &サブルーチン(SELF, STR); SELF は Unicode::LineBreak オブジェクト、STR は REGEX にマッチする分割すべき Unicode 文字列。 サブルーチンは、文字列 STR を分割した結果の配列を返さなければならない。 たとえば次のコードは、HTTP URL を [CMOS] の規則を用いて分割する。 my $url = qr{http://[\x21-\x7E]+}i; sub breakurl { my $self = shift; my $str = shift; return split m{(?<=[/]) (?=[^/]) | (?<=[^-.]) (?=[-~.,_?\#%=&]) | (?<=[=&]) (?=.)}x, $str; } my $lb = Unicode::LineBreak->new(Prep => [$url, \&breakurl]); $output = $lb->break($string); =head3 状態の保存 Unicode::LineBreak オブジェクトはハッシュ参照としてふるまう。 任意の要素を、オブジェクトの存在期間中保存できる。 たとえば次のコードは、段落を空行で分ける。 sub paraformat { my $self = shift; my $action = shift; my $str = shift; if ($action eq 'sot' or $action eq 'sop') { $self->{'line'} = ''; } elsif ($action eq '') { $self->{'line'} = $str; } elsif ($action eq 'eol') { return "\n"; } elsif ($action eq 'eop') { if (length $self->{'line'}) { return "\n\n"; } else { return "\n"; } } elsif ($action eq 'eot') { return "\n"; } return undef; } my $lb = Unicode::LineBreak->new(Format => \¶format); $output = $lb->break($string); =head2 文字列長の算出 L オプションにサブルーチンへの参照を指定する場合、そのサブルーチンは 5 つの引数を取らなければならない。 $桁数 = &サブルーチン(SELF, LEN, PRE, SPC, STR); SELF は Unicode::LineBreak オブジェクト、LEN は先行する文字列の長さ、PRE は先行する Unicode 文字列、SPC は追加される空白文字、STR は処理する Unicode 文字列。 サブルーチンは C の桁数を算出して返さなければならない。 桁数は整数でなくてもよい。桁数の単位は随意に選べるが、L オプションおよび L オプションのそれと一致させなければならない。 B<注意>: 文字列の引数は実際には書記素クラスタ列である。 L 参照。 たとえば次のコードは、行に 8 桁ごとのタブストップがあるものとして処理する。 sub tabbedsizing { my ($self, $cols, $pre, $spc, $str) = @_; my $spcstr = $spc.$str; while ($spcstr->lbc == LB_SP) { my $c = $spcstr->item(0); if ($c eq "\t") { $cols += 8 - $cols % 8; } else { $cols += $c->columns; } $spcstr = $spcstr->substr(1); } $cols += $spcstr->columns; return $cols; }; my $lb = Unicode::LineBreak->new(LBClass => [ord("\t") => LB_SP], Sizing => \&tabbedsizing); $output = $lb->break($string); =head2 文字の特性の手直し L オプションおよび L オプションで個々の文字の行分割特性 (分類) や East_Asian_Width 特性を手直しできる。その際に便利な定数をいくつか定義してある。 =head3 行分割特性 =head4 仮名などの行頭禁則文字 初期値では、若干の仮名や仮名に準ずるものを行頭禁則文字 (NS または CJ) と扱う。 以下の対を L オプションに指定すれば、これらの文字を通常の表語文字的な文字 (ID) と扱える。 =over 4 =item C LB_ID> 下記の文字すべて。 =item C LB_ID> 表語文字的な繰り返し記号。 U+3005 繰返し記号、U+303B ゆすり点、U+309D 平仮名繰返し記号、U+309E 平仮名繰返し記号 (濁点)、U+30FD 片仮名繰返し記号、U+30FE 片仮名繰返し記号 (濁点)。 注。仮名ではないものもある。 =item C LB_ID> =item C LB_ID> 小書き仮名。 小書き平仮名 U+3041 ぁ, U+3043 ぃ, U+3045 ぅ, U+3047 ぇ, U+3049 ぉ, U+3063 っ, U+3083 ゃ, U+3085 ゅ, U+3087 ょ, U+308E ゎ, U+3095 E<0x3095>, U+3096 E<0x3096>。 小書き片仮名 U+30A1 ァ, U+30A3 ィ, U+30A5 ゥ, U+30A7 ェ, U+30A9 ォ, U+30C3 ッ, U+30E3 ャ, U+30E5 ュ, U+30E7 ョ, U+30EE ヮ, U+30F5 ヵ, U+30F6 ヶ。 片仮名表音拡張 U+31F0 E<0x31F0> - U+31FF E<0x31FF>。 小書き片仮名 (代替名称) U+FF67 E<0xFF67> - U+FF6F E<0xFF6F>。 長音記号。 U+30FC 長音記号、U+FF70 長音記号 (代替名称)。 注。これらの文字は行頭禁則文字と扱われることも、通常の表語文字的な文字と扱われることもある。[JIS X 4051] 6.1.1、[JLREQ] 3.1.7 や [UAX14] を参照。 注。U+3095 E<0x3095>, U+3096 E<0x3096>, U+30F5 ヵ, U+30F6 ヶ は仮名ではないとされる。 =item C LB_ID> U+303C ます記号。 注。この文字は仮名ではないが、通常 C<"ます"> や C<"マス"> の略記として用いられる。 注。この文字は [UAX #14] では行頭禁則文字 (NS) に分類されるが、[JIS X 4051] や [JLREQ] では文字クラス (13) や cl-19 (ID に相当) に分類される。 =back =head4 曖昧な引用符 初期値では、若干の記号を曖昧な引用符 (QU) と扱う。 =over 4 =item C LB_OP, FORWARD_QUOTES() =E LB_CL> ある言語 (オランダ語、英語、イタリア語、ポルトガル語、スペイン語、トルコ語、 および東アジアの多くの言語) では、開き記号に 9 が回転した形状の引用符 (E<0x2018> E<0x201C>) を、閉じ記号に 9 の形状の引用符 (E<0x2019> E<0x201D>) を用いる。 =item C LB_OP, BACKWARD_QUOTES() =E LB_CL> ほかの言語 (チェコ語、ドイツ語、スロヴァク語) では、9 の形状の引用符 (E<0x2019> E<0x201D>) を開き記号に、9 が回転した形状の引用符 (E<0x2018> E<0x201C>) を閉じ記号に用いる。 =item C LB_OP, FORWARD_GUILLEMETS() =E LB_CL> フランス語、ギリシャ語、ロシア語などでは、左向きのギュメ (E<0x00AB> E<0x2039>) を開き記号に、右向きのギュメ (E<0x00BB> E<0x203A>) を閉じ記号に用いる。 =item C LB_OP, BACKWARD_GUILLEMETS() =E LB_CL> ドイツ語やスロヴァク語では、右向きのギュメ (E<0x00BB> E<0x203A>) を開き記号に、左向きのギュメ (E<0x00AB> E<0x2039>) を閉じ記号に用いる。 =back デーン語、フィン語、ノルウェー語、スウェーデン語では、9 の形状の引用符や 右向きのギュメ (E<0x2019> E<0x201D> E<0x00BB> E<0x203A>) を開き記号にも閉じ記号にも用いる。 =head4 和字間隔 =over 4 =item C LB_BA> U+3000 和字間隔が行頭に来ないようにする。 これが初期の挙動である。 =item C LB_ID> 和字間隔が行頭に来ることがある。 Unicode 6.2以前はこれが初期の挙動であった。 =item C LB_SP> 和字間隔が行頭に来ず、行末でははみ出すようにする。 =back =head3 East_Asian_Width 特性 ラテン、ギリシア、キリルの各用字系では、特定の文字が曖昧 (A) の East_Asian_Width 特性を持っている。このため、こういった文字は C<"EASTASIAN"> 文脈で広い文字と扱われる。 C [ AMBIGUOUS_>*C<() =E EA_N ]> と指定することで、そのような文字を常に狭い文字と扱う。 =over 4 =item C EA_N> 下記の文字すべてを East_Asian_Width 特性 N (中立) の文字と扱う。 =item C EA_N> =item C EA_N> =item C EA_N> 曖昧 (A) の幅を持つキリル、ギリシア、ラテン用字系の文字を中立 (N) の文字と扱う。 =back いっぽう、東アジアの符号化文字集合に対する多くの実装でたびたび広い文字に描画されてきたにもかかわらず、Unicode 標準では全角 (F) の互換文字を持つがゆえに狭い (Na) 文字とされている文字が若干ある。L オプションに以下のように指定することで、これらの文字を C<"EASTASIAN"> 文脈で広い文字と扱える。 =over 4 =item C EA_A> U+00A2 セント記号、U+00A3 ポンド記号、U+00A5 円記号 (または元記号)、U+00A6 破断線、U+00AC 否定、U+00AF マクロン。 =back =head2 設定ファイル L メソッドおよび L メソッドのオプション引数の組み込み初期値は、 設定ファイルで上書きできる。 F。 詳細は F を読んでほしい。 =head1 BUGS バグやバグのような動作は、開発者に教えてください。 CPAN Request Tracker: L. =head1 VERSION $VERSION 変数を参照してほしい。 =head2 非互換な変更 =over 4 =item 2012.06 =over 4 =item * eawidth() メソッドを廃止した。 代わりに L が使えるかもしれない。 =item * lbclass() メソッドを廃止した。 L や L を使ってほしい。 =back =back =head2 標準への適合性 このモジュールで用いている文字の特性値は、Unicode 標準 8.0.0版による。 このモジュールでは、実装水準 UAX14-C2 を実装しているつもり。 =head1 IMPLEMENTATION NOTES =over 4 =item * 一部の表語文字的な文字を NS として扱うか ID として扱うかを選べる。 =item * ハングル音節およびハングル連結チャモを ID として扱うか AL として扱うかを選べる。 =item * AI に分類される文字を AL と ID のどちらに解決するかを選べる。 =item * CB に分類される文字は解決しない。 =item * CJ に分類される文字は常に NS に解決する。より柔軟な手直しの機構が提供される。 =item * 東南アジアの表記体系の単語分節に対応しない場合は、 SA に分類される文字は AL に解決する。 ただし、Grapheme_Cluster_Break 特性の値が Extend か SpacingMark である文字は CM に解決する。 =item * SG や XX に分類される文字は AL に解決する。 =item * 以下の UCS の範囲にあるコードポイントは、文字が割り当てられていなくても決まった特性値を持つ。 範囲 | UAX #14 | UAX #11 | 説明 ------------------------------------------------------------- U+20A0..U+20CF | PR [*1] | N [*2] | 通貨記号 U+3400..U+4DBF | ID | W | CJK漢字 U+4E00..U+9FFF | ID | W | CJK漢字 U+D800..U+DFFF | AL (SG) | N | サロゲート U+E000..U+F8FF | AL (XX) | F か N (A) | 私用領域 U+F900..U+FAFF | ID | W | CJK漢字 U+20000..U+2FFFD | ID | W | CJK漢字 U+30000..U+3FFFD | ID | W | 古漢字 U+F0000..U+FFFFD | AL (XX) | F か N (A) | 私用領域 U+100000..U+10FFFD | AL (XX) | F か N (A) | 私用領域 その他未割り当て | AL (XX) | N | 未割り当て、 | | | 予約、非文字 ------------------------------------------------------------- [*1] U+20A7 ペセタ記号 (PO)、U+20B6 トゥール・リーヴル記号 (PO)、U+20BB スカンディナヴィア・マルク記号 (PO)、U+20BE ラリ記号 (PO) を除く。 [*2] U+20A9 ウォン記号 (H)、U+20AC ユーロ記号 (F か N (A)) を 除く。 =item * 一般カテゴリ特性が Mn、Me、Cc、Cf、Zl、Zp のいずれかである文字は、前進を伴わない文字とみなす。 =back =head1 REFERENCES =over 4 =item [CMOS] I, 15th edition. University of Chicago Press, 2003. =item [JIS X 4051] JIS X 4051:2004 I<日本語文書の組版方法>. 日本規格協会, 2004. =item [JLREQ] 阿南康宏他. I<日本語組版処理の要件>, W3C 技術ノート 2012年4月3日. L. =begin comment =item [Kubota] 久保田智広 (2001-2002). 文字幅問題, I. L. =end comment =item [UAX #11] A. Freytag (ed.) (2008-2009). I, Revisions 17-19. L. =item [UAX #14] A. Freytag and A. Heninger (eds.) (2008-2015). I, Revisions 22-35. L. =item [UAX #29] Mark Davis (ed.) (2009-2013). I, Revisions 15-23. L. =back =head1 SEE ALSO L, L, L. =head1 AUTHOR Copyright (C) 2009-2013 Hatuka*nezumi - IKEDA Soji . This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut Unicode-LineBreak-Unicode-LineBreak-2016.007_02/lib/Text/000077500000000000000000000000001273566223400224145ustar00rootroot00000000000000Unicode-LineBreak-Unicode-LineBreak-2016.007_02/lib/Text/LineFold.pm000066400000000000000000000324131273566223400244510ustar00rootroot00000000000000#-*- perl -*- package Text::LineFold; require 5.008; =encoding utf-8 =head1 NAME Text::LineFold - Line Folding for Plain Text =head1 SYNOPSIS use Text::LineFold; $lf = Text::LineFold->new(); # Fold lines $folded = $lf->fold($string, 'PLAIN'); $indented = $lf->fold(' ' x 8, ' ' x 4, $string); # Unfold lines $unfolded = $lf->unfold($string, 'FIXED'); =head1 DESCRIPTION Text::LineFold folds or unfolds lines of plain text. As it mainly focuses on plain text e-mail messages, RFC 3676 flowed format is also supported. =cut ### Pragmas: use strict; use vars qw($VERSION @EXPORT_OK @ISA $Config); ### Exporting: use Exporter; ### Inheritance: our @ISA = qw(Exporter Unicode::LineBreak); ### Other modules: use Carp qw(croak carp); use Encode qw(is_utf8); use MIME::Charset; use Unicode::LineBreak qw(:all); ### Globals ### The package Version our $VERSION = '2016.00702'; ### Public Configuration Attributes our $Config = { ### %{$Unicode::LineBreak::Config}, Charset => 'UTF-8', Language => 'XX', OutputCharset => undef, TabSize => 8, }; ### Privates my %FORMAT_FUNCS = ( 'FIXED' => sub { my $self = shift; my $action = shift; my $str = shift; if ($action =~ /^so[tp]/) { $self->{_} = {}; $self->{_}->{'ColMax'} = $self->config('ColMax'); $self->config('ColMax' => 0) if $str =~ /^>/; } elsif ($action eq "") { $self->{_}->{line} = $str; } elsif ($action eq "eol") { return $self->config('Newline'); } elsif ($action =~ /^eo/) { if (length $self->{_}->{line} and $self->config('ColMax')) { $str = $self->config('Newline').$self->config('Newline'); } else { $str = $self->config('Newline'); } $self->config('ColMax' => $self->{_}->{'ColMax'}); delete $self->{_}; return $str; } undef; }, 'FLOWED' => sub { # RFC 3676 my $self = shift; my $action = shift; my $str = shift; if ($action eq 'sol') { if ($self->{_}->{prefix}) { return $self->{_}->{prefix}.' '.$str; } } elsif ($action =~ /^so/) { $self->{_} = {}; if ($str =~ /^(>+)/) { $self->{_}->{prefix} = $1; } else { $self->{_}->{prefix} = ''; } } elsif ($action eq "") { if ($str =~ /^(?: |From )/ or $str =~ /^>/ and !length $self->{_}->{prefix}) { return $self->{_}->{line} = ' ' . $str; } $self->{_}->{line} = $str; } elsif ($action eq 'eol') { $str = ' ' if length $str; return $str.' '.$self->config('Newline'); } elsif ($action =~ /^eo/) { if (length $self->{_}->{line} and !length $self->{_}->{prefix}) { $str = ' '.$self->config('Newline').$self->config('Newline'); } else { $str = $self->config('Newline'); } delete $self->{_}; return $str; } undef; }, 'PLAIN' => sub { return $_[0]->config('Newline') if $_[1] =~ /^eo/; undef; }, ); =head2 Public Interface =over 4 =item new ([KEY => VALUE, ...]) I. About KEY => VALUE pairs see config method. =back =cut sub new { my $class = shift; my $self = bless __PACKAGE__->SUPER::new(), $class; $self->config(@_); $self; } =over 4 =item $self->config (KEY) =item $self->config ([KEY => VAL, ...]) I. Get or update configuration. Following KEY => VALUE pairs may be specified. =over 4 =item Charset => CHARSET Character set that is used to encode string. It may be string or L object. Default is C<"UTF-8">. =item Language => LANGUAGE Along with Charset option, this may be used to define language/region context. Default is C<"XX">. See also L option. =item Newline => STRING String to be used for newline sequence. Default is C<"\n">. =item OutputCharset => CHARSET Character set that is used to encode result of fold()/unfold(). It may be string or L object. If a special value C<"_UNICODE_"> is specified, result will be Unicode string. Default is the value of Charset option. =item TabSize => NUMBER Column width of tab stops. When 0 is specified, tab stops are ignored. Default is 8. =item BreakIndent =item CharMax =item ColMax =item ColMin =item ComplexBreaking =item EAWidth =item HangulAsAL =item LBClass =item LegacyCM =item Prep =item Urgent See L. =back =back =cut sub config { my $self = shift; my @opts = qw{Charset Language OutputCharset TabSize}; my %opts = map { (uc $_ => $_) } @opts; my $newline = undef; # Get config. if (scalar @_ == 1) { if ($opts{uc $_[0]}) { return $self->{$opts{uc $_[0]}}; } return $self->SUPER::config($_[0]); } # Set config. my @o = (); my %params = @_; foreach my $k (keys %params) { my $v = $params{$k}; if ($opts{uc $k}) { $self->{$opts{uc $k}} = $v; } elsif (uc $k eq uc 'Newline') { $newline = $v; } else { push @o, $k => $v; } } $self->SUPER::config(@o) if scalar @o; # Character set and language assumed. if (ref $self->{Charset} eq 'MIME::Charset') { $self->{_charset} = $self->{Charset}; } else { $self->{Charset} ||= $Config->{Charset}; $self->{_charset} = MIME::Charset->new($self->{Charset}); } $self->{Charset} = $self->{_charset}->as_string; my $ocharset = uc($self->{OutputCharset} || $self->{Charset}); $ocharset = MIME::Charset->new($ocharset) unless ref $ocharset eq 'MIME::Charset' or $ocharset eq '_UNICODE_'; unless ($ocharset eq '_UNICODE_') { $self->{_charset}->encoder($ocharset); $self->{OutputCharset} = $ocharset->as_string; } $self->{Language} = uc($self->{Language} || $Config->{Language}); ## Context $self->SUPER::config(Context => context(Charset => $self->{Charset}, Language => $self->{Language})); ## Set sizing method. $self->SUPER::config(Sizing => sub { my ($self, $cols, $pre, $spc, $str) = @_; my $tabsize = $self->{TabSize}; my $spcstr = $spc.$str; $spcstr->pos(0); while (!$spcstr->eos and $spcstr->item->lbc == LB_SP) { my $c = $spcstr->next; if ($c eq "\t") { $cols += $tabsize - $cols % $tabsize if $tabsize; } else { $cols += $c->columns; } } return $cols + $spcstr->substr($spcstr->pos)->columns; }); ## Classify horizontal tab as line breaking class SP. $self->SUPER::config(LBClass => [ord("\t") => LB_SP]); ## Tab size if (defined $self->{TabSize}) { croak "Invalid TabSize option" unless $self->{TabSize} =~ /^\d+$/; $self->{TabSize} += 0; } else { $self->{TabSize} = $Config->{TabSize}; } ## Newline if (defined $newline) { $newline = $self->{_charset}->decode($newline) unless is_utf8($newline); $self->SUPER::config(Newline => $newline); } } =over 4 =item $self->fold (STRING, [METHOD]) =item $self->fold (INITIAL_TAB, SUBSEQUENT_TAB, STRING, ...) I. fold() folds lines of string STRING and returns it. Surplus SPACEs and horizontal tabs at end of line are removed, newline sequences are replaced by that specified by Newline option and newline is appended at end of text if it does not exist. Horizontal tabs are treated as tab stops according to TabSize option. By the first style, following options may be specified for METHOD argument. =over 4 =item C<"FIXED"> Lines preceded by C<"E"> won't be folded. Paragraphs are separated by empty line. =item C<"FLOWED"> C<"Format=Flowed; DelSp=Yes"> formatting defined by RFC 3676. =item C<"PLAIN"> Default method. All lines are folded. =back Second style is similar to L. All lines are folded. INITIAL_TAB is inserted at beginning of paragraphs and SUBSEQUENT_TAB at beginning of other broken lines. =back =cut # Special breaking characters: VT, FF, NEL, LS, PS my $special_break = qr/([\x{000B}\x{000C}\x{0085}\x{2028}\x{2029}])/os; sub fold { my $self = shift; my $str; if (2 < scalar @_) { my $initial_tab = shift || ''; $initial_tab = $self->{_charset}->decode($initial_tab) unless is_utf8($initial_tab); my $subsequent_tab = shift || ''; $subsequent_tab = $self->{_charset}->decode($subsequent_tab) unless is_utf8($subsequent_tab); my @str = @_; ## Decode and concat strings. $str = shift @str; $str = $self->{_charset}->decode($str) unless is_utf8($str); foreach my $s (@str) { next unless defined $s and length $s; $s = $self->{_charset}->decode($s) unless is_utf8($s); unless (length $str) { $str = $s; } elsif ($str =~ /(\s|$special_break)$/ or $s =~ /^(\s|$special_break)/) { $str .= $s; } else { $str .= ' ' if $self->breakingRule($str, $s) == INDIRECT; $str .= $s; } } ## Set format method. $self->SUPER::config(Format => sub { my $self = shift; my $event = shift; my $str = shift; if ($event =~ /^eo/) { return $self->config('Newline'); } if ($event =~ /^so[tp]/) { return $initial_tab.$str; } if ($event eq 'sol') { return $subsequent_tab.$str; } undef; }); } else { $str = shift; my $method = uc(shift || ''); return '' unless defined $str and length $str; ## Decode string. $str = $self->{_charset}->decode($str) unless is_utf8($str); ## Set format method. $self->SUPER::config(Format => $FORMAT_FUNCS{$method} || $FORMAT_FUNCS{'PLAIN'}); } ## Do folding. my $result = ''; foreach my $s (split $special_break, $str) { if ($s =~ $special_break) { $result .= $s; } else { $result .= $self->break($str); } } ## Encode result. if ($self->{OutputCharset} eq '_UNICODE_') { return $result; } else { return $self->{_charset}->encode($result); } } =over 4 =item $self->unfold (STRING, METHOD) Conjunct folded paragraphs of string STRING and returns it. Following options may be specified for METHOD argument. =over 4 =item C<"FIXED"> Default method. Lines preceded by C<"E"> won't be conjuncted. Treat empty line as paragraph separator. =item C<"FLOWED"> Unfold C<"Format=Flowed; DelSp=Yes"> formatting defined by RFC 3676. =item C<"FLOWEDSP"> Unfold C<"Format=Flowed; DelSp=No"> formatting defined by RFC 3676. =begin comment =item C<"OBSFLOWED"> Unfold C<"Format=Flowed> formatting defined by (obsoleted) RFC 2646 as well as possible. =end comment =back =back =cut sub unfold { my $self = shift; my $str = shift; return '' unless defined $str and length $str; ## Get format method. my $method = uc(shift || 'FIXED'); $method = 'FIXED' unless $method =~ /^(?:FIXED|FLOWED|FLOWEDSP|OBSFLOWED)$/; my $delsp = $method eq 'FLOWED'; ## Decode string and canonizalize newline. $str = $self->{_charset}->decode($str) unless is_utf8($str); $str =~ s/\r\n|\r/\n/g; ## Do unfolding. my $result = ''; foreach my $s (split $special_break, $str) { if ($s eq '') { next; } elsif ($s =~ $special_break) { $result .= $s; next; } elsif ($method eq 'FIXED') { pos($s) = 0; while ($s !~ /\G\z/cg) { if ($s =~ /\G\n/cg) { $result .= $self->config('Newline'); } elsif ($s =~ /\G(.+)\n\n/cg) { $result .= $1.$self->config('Newline'); } elsif ($s =~ /\G(>.*)\n/cg) { $result .= $1.$self->config('Newline'); } elsif ($s =~ /\G(.+)\n(?=>)/cg) { $result .= $1.$self->config('Newline'); } elsif ($s =~ /\G(.+?)( *)\n(?=(.+))/cg) { my ($l, $s, $n) = ($1, $2, $3); $result .= $l; if ($n =~ /^ /) { $result .= $self->config('Newline'); } elsif (length $s) { $result .= $s; } elsif (length $l) { $result .= ' ' if $self->breakingRule($l, $n) == INDIRECT; } } elsif ($s =~ /\G(.+)\n/cg) { $result .= $1.$self->config('Newline'); } elsif ($s =~ /\G(.+)/cg) { $result .= $1.$self->config('Newline'); last; } } } elsif ($method eq 'FLOWED' or $method eq 'FLOWEDSP' or $method eq 'OBSFLOWED') { my $prefix = undef; pos($s) = 0; while ($s !~ /\G\z/cg) { if ($s =~ /\G(>+) ?(.*?)( ?)\n/cg) { my ($p, $l, $s) = ($1, $2, $3); unless (defined $prefix) { $result .= $p.' '.$l; } elsif ($p ne $prefix) { $result .= $self->config('Newline'); $result .= $p.' '.$l; } else { $result .= $l; } unless (length $s) { $result .= $self->config('Newline'); $prefix = undef; } else { $prefix = $p; $result .= $s unless $delsp; } } elsif ($s =~ /\G ?(.*?)( ?)\n/cg) { my ($l, $s) = ($1, $2); unless (defined $prefix) { $result .= $l; } elsif ('' ne $prefix) { $result .= $self->config('Newline'); $result .= $l; } else { $result .= $l; } unless (length $s) { $result .= $self->config('Newline'); $prefix = undef; } else { $result .= $s unless $delsp; $prefix = ''; } } elsif ($s =~ /\G ?(.*)/cg) { $result .= $1.$self->config('Newline'); last; } } } } ## Encode result. if ($self->{OutputCharset} eq '_UNICODE_') { return $result; } else { return $self->{_charset}->encode($result); } } =head1 BUGS Please report bugs or buggy behaviors to developer. CPAN Request Tracker: L. =head1 VERSION Consult $VERSION variable. =head1 SEE ALSO L, L. =head1 AUTHOR Copyright (C) 2009-2012 Hatuka*nezumi - IKEDA Soji . This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut 1; Unicode-LineBreak-Unicode-LineBreak-2016.007_02/lib/Unicode/000077500000000000000000000000001273566223400230565ustar00rootroot00000000000000Unicode-LineBreak-Unicode-LineBreak-2016.007_02/lib/Unicode/GCString.pm000066400000000000000000000016401273566223400250750ustar00rootroot00000000000000#-*-perl-*- package Unicode::GCString; require 5.008; =encoding utf-8 =cut ### Pragmas: use strict; use warnings; use vars qw($VERSION @EXPORT_OK @ISA); ### Exporting: use Exporter; our @EXPORT_OK = qw(); our %EXPORT_TAGS = ('all' => [@EXPORT_OK]); ### Inheritance: our @ISA = qw(Exporter); ### Other modules: use Unicode::LineBreak; ### Globals # The package version our $VERSION = '2013.10'; use overload '@{}' => \&as_arrayref, '${}' => \&as_scalarref, '""' => \&as_string, '.' => \&concat, #XXX'.=' => \&concat, #FIXME:segfault 'cmp' => \&cmp, '<>' => \&next, ; sub new { my $class = shift; my $self; if (scalar @_ <= 2) { $self = __PACKAGE__->_new(@_); } else { my $str = shift; my $lb = Unicode::LineBreak->new(@_); $self = __PACKAGE__->_new($str, $lb); } bless $self, $class; } sub as_arrayref { my @a = shift->as_array; return \@a; } 1; Unicode-LineBreak-Unicode-LineBreak-2016.007_02/lib/Unicode/GCString.pod000066400000000000000000000143231273566223400252450ustar00rootroot00000000000000=encoding utf-8 =head1 NAME Unicode::GCString - String as Sequence of UAX #29 Grapheme Clusters =head1 SYNOPSIS use Unicode::GCString; $gcstring = Unicode::GCString->new($string); =head1 DESCRIPTION Unicode::GCString treats Unicode string as a sequence of I defined by Unicode Standard Annex #29 [UAX #29]. B is a sequence of Unicode character(s) that consists of one B and optional B and/or B<“prepend” character>. It is close in that people consider as I. =head2 Public Interface =head3 Constructors =over 4 =item new (STRING, [KEY => VALUE, ...]) =item new (STRING, [LINEBREAK]) I. Create new grapheme cluster string (Unicode::GCString object) from Unicode string STRING. About optional KEY => VALUE pairs see L. On second form, L object LINEBREAK controls breaking features. B: The first form was introduced by release 2012.10. =item copy I. Create a copy of grapheme cluster string. Next position of new string is set at beginning. =back =head3 Sizes =over 4 =item chars I. Returns number of Unicode characters grapheme cluster string includes, i.e. length as Unicode string. =item columns I. Returns total number of columns of grapheme clusters defined by built-in character database. For more details see L. =item length I. Returns number of grapheme clusters contained in grapheme cluster string. =back =head3 Operations as String =over 4 =item as_string =item C<">OBJECTC<"> I. Convert grapheme cluster string to Unicode string explicitly. =item cmp (STRING) =item STRING C STRING I. Compare strings. There are no oddities. One of each STRING may be Unicode string. =item concat (STRING) =item STRING C<.> STRING I. Concatenate STRINGs. One of each STRING may be Unicode string. Note that number of columns (see columns()) or grapheme clusters (see length()) of resulting string is not always equal to sum of both strings. Next position of new string is that set on the left value. =item join ([STRING, ...]) I. Join STRINGs inserting grapheme cluster string. Any of STRINGs may be Unicode string. =item substr (OFFSET, [LENGTH, [REPLACEMENT]]) I. Returns substring of grapheme cluster string. OFFSET and LENGTH are based on grapheme clusters. If REPLACEMENT is specified, substring is replaced by it. REPLACEMENT may be Unicode string. Note: This method cannot return the lvalue, unlike built-in substr(). =back =head3 Operations as Sequence of Grapheme Clusters =over 4 =item as_array =item C<@{>OBJECTC<}> =item as_arrayref I. Convert grapheme cluster string to an array of grapheme clusters. =item eos I. Test if current position is at end of grapheme cluster string. =item item ([OFFSET]) I. Returns OFFSET-th grapheme cluster. If OFFSET was not specified, returns next grapheme cluster. =item next =item C>OBJECTC> I, iterative. Returns next grapheme cluster and increment next position. =item pos ([OFFSET]) I. If optional OFFSET is specified, set next position by it. Returns next position of grapheme cluster string. =back =begin comment =head4 Methods planned to be deprecated =over 4 =item flag ([OFFSET, [VALUE]]) I. Get or set flag value of OFFEST-th grapheme cluster. If OFFSET was not specified, returns flag value of next grapheme cluster. Flag value is an non-zero integer not greater than 255 and initially is 0. Predefined flags are: =over 4 =item Unicode::LineBreak::ALLOW_BEFORE Allow line breaking just before this grapheme cluster. =item Unicode::LineBreak::PROHIBIT_BEFORE Prohibit line breaking just before this grapheme cluster. =back =item lbclass ([OFFSET]) I. Returns Line Breaking Class (See L) of the first character of OFFSET-th grapheme cluster. If OFFSET was not specified, returns class of next grapheme cluster. B: Use lbc(). =item lbclass_ext ([OFFSET]) I. Returns Line Breaking Class (See L) of the last grapheme extender of OFFSET-th grapheme cluster. If there are no grapheme extenders or its class is CM, value of lbclass() is returned. B: Use lbcext(). =back =end comment =head3 Miscelaneous =over 4 =item lbc I. Returns Line Breaking Class (See L) of the first character of first grapheme cluster. =item lbcext I. Returns Line Breaking Class (See L) of the last grapheme extender of last grapheme cluster. If there are no grapheme extenders or its class is CM, value of last grapheme base will be returned. =back =head1 CAVEATS =over 4 =item * The grapheme cluster should not be referred to as "grapheme" even though Larry does. =item * On Perl around 5.10.1, implicit conversion from Unicode::GCString object to Unicode string sometimes let C<"utf8_mg_pos_cache_update"> cache be confused. For example, instead of doing $sub = substr($gcstring, $i, $j); do $sub = substr("$gcstring", $i, $j); $sub = substr($gcstring->as_string, $i, $j); =item * This module implements I algorithm for determining grapheme cluster boundaries. Tailoring mechanism has not been supported yet. =back =head1 VERSION Consult $VERSION variable. =head2 Incompatible Changes =over 4 =item Release 2013.10 =over 4 =item * The new() method can take non-Unicode string argument. In this case it will be decoded by iso-8859-1 (Latin 1) character set. That method of former releases would die with non-Unicode inputs. =back =back =head1 SEE ALSO [UAX #29] Mark Davis (ed.) (2009-2013). I, Revisions 15-23. L. =head1 AUTHOR Hatuka*nezumi - IKEDA Soji =head1 COPYRIGHT Copyright (C) 2009-2013 Hatuka*nezumi - IKEDA Soji. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut Unicode-LineBreak-Unicode-LineBreak-2016.007_02/lib/Unicode/LineBreak.pm000066400000000000000000000126071273566223400252560ustar00rootroot00000000000000#-*- perl -*- package Unicode::LineBreak; require 5.008; ### Pragmas: use strict; use warnings; use vars qw($VERSION @EXPORT_OK @ISA $Config @Config); ### Exporting: use Exporter; our @EXPORT_OK = qw(UNICODE_VERSION SOMBOK_VERSION context); our %EXPORT_TAGS = ('all' => [@EXPORT_OK]); ### Inheritance: our @ISA = qw(Exporter); ### Other modules: use Carp qw(croak carp); use Encode qw(is_utf8); use MIME::Charset; use Unicode::GCString; ### Globals ### The package version our $VERSION = '2016.007_02'; ### Public Configuration Attributes our @Config = ( BreakIndent => 'YES', CharMax => 998, ColMax => 76, ColMin => 0, ComplexBreaking => 'YES', Context => 'NONEASTASIAN', EAWidth => undef, Format => 'SIMPLE', HangulAsAL => 'NO', LBClass => undef, LegacyCM => 'YES', Newline => "\n", Prep => undef, Sizing => 'UAX11', Urgent => undef, ViramaAsJoiner => 'YES', ); our $Config = {}; eval { require Unicode::LineBreak::Defaults; }; push @Config, (%$Config); ### Exportable constants use Unicode::LineBreak::Constants; use constant 1.01; my $package = __PACKAGE__; my @consts = grep { s/^${package}::(\w\w+)$/$1/ } keys %constant::declared; push @EXPORT_OK, @consts; push @{$EXPORT_TAGS{'all'}}, @consts; ### Load XS module require XSLoader; XSLoader::load('Unicode::LineBreak', $VERSION); ### Load dynamic constants foreach my $p ((['EA', EAWidths()], ['LB', LBClasses()])) { my $prop = shift @{$p}; my $idx = 0; foreach my $val (@{$p}) { no strict; my $const = "${prop}_${val}"; *{$const} = eval "sub { $idx }"; push @EXPORT_OK, $const; push @{$EXPORT_TAGS{'all'}}, $const; $idx++; } } ### Privates my $EASTASIAN_CHARSETS = qr{ ^BIG5 | ^CP9\d\d | ^EUC- | ^GB18030 | ^GB2312 | ^GBK | ^HZ | ^ISO-2022- | ^KS_C_5601 | ^SHIFT_JIS }ix; my $EASTASIAN_LANGUAGES = qr{ ^AIN | ^JA\b | ^JPN | ^KO\b | ^KOR | ^ZH\b | ^CHI }ix; use overload '%{}' => \&as_hashref, '${}' => \&as_scalarref, '""' => \&as_string, ; sub new { my $class = shift; my $self = __PACKAGE__->_new(); $self->config(@Config); $self->config(@_); bless $self, $class; } sub config ($@) { my $self = shift; # Get config. if (scalar @_ == 1) { my $k = shift; my $ret; if (uc $k eq uc 'CharactersMax') { return $self->_config('CharMax'); } elsif (uc $k eq uc 'ColumnsMax') { return $self->_config('ColMax'); } elsif (uc $k eq uc 'ColumnsMin') { return $self->_config('ColMin'); } elsif (uc $k eq uc 'SizingMethod') { return $self->_config('Sizing'); } elsif (uc $k eq uc 'TailorEA') { carp "$k is obsoleted. Use EAWidth"; $ret = $self->_config('EAWidth'); if (! defined $ret) { return []; } else { return [map { ($_->[0] => $_->[1]) } @{$ret}]; } } elsif (uc $k eq uc 'TailorLB') { carp "$k is obsoleted. Use LBClass"; $ret = $self->_config('LBClass'); if (! defined $ret) { return []; } else { return [map { ($_->[0] => $_->[1]) } @{$ret}]; } } elsif (uc $k eq uc 'UrgentBreaking') { return $self->_config('Urgent'); } elsif (uc $k eq uc 'UserBreaking') { carp "$k is obsoleted. Use Prep"; $ret = $self->_config('Prep'); if (! defined $ret) { return []; } else { return $ret; } } else { return $self->_config($k); } } # Set config. my @config = (); while (0 < scalar @_) { my $k = shift; my $v = shift; if (uc $k eq uc 'CharactersMax') { push @config, 'CharMax' => $v; } elsif (uc $k eq uc 'ColumnsMax') { push @config, 'ColMax' => $v; } elsif (uc $k eq uc 'ColumnsMin') { push @config, 'ColMin' => $v; } elsif (uc $k eq uc 'SizingMethod') { push @config, 'Sizing' => $v; } elsif (uc $k eq uc 'TailorLB') { carp "$k is obsoleted. Use LBClass"; push @config, 'LBClass' => undef; if (! defined $v) { ; } else { my @v = @{$v}; while (scalar(@v)) { my $k = shift @v; my $v = shift @v; push @config, 'LBClass' => [ $k => $v ]; } } } elsif (uc $k eq uc 'TailorEA') { carp "$k is obsoleted. Use EAWidth"; push @config, 'EAWidth' => undef; if (! defined $v) { ; } else { my @v = @{$v}; while (scalar(@v)) { my $k = shift @v; my $v = shift @v; push @config, 'EAWidth' => [ $k => $v ]; } } } elsif (uc $k eq uc 'UserBreaking') { carp "$k is obsoleted. Use Prep"; push @config, 'Prep' => undef; if (! defined $v) { ; } elsif (ref $v eq 'ARRAY') { push @config, map { ('Prep' => $_) } @{$v}; } else { push @config, 'Prep' => $v; } } elsif (uc $k eq uc 'UrgentBreaking') { push @config, 'Urgent' => $v; } else { push @config, $k => $v; } } $self->_config(@config) if scalar @config; } sub context (@) { my %opts = @_; my $charset; my $language; my $context; foreach my $k (keys %opts) { if (uc $k eq 'CHARSET') { if (ref $opts{$k}) { $charset = $opts{$k}->as_string; } else { $charset = MIME::Charset->new($opts{$k})->as_string; } } elsif (uc $k eq 'LANGUAGE') { $language = uc $opts{$k}; $language =~ s/_/-/; } } if ($charset and $charset =~ /$EASTASIAN_CHARSETS/) { $context = 'EASTASIAN'; } elsif ($language and $language =~ /$EASTASIAN_LANGUAGES/) { $context = 'EASTASIAN'; } else { $context = 'NONEASTASIAN'; } $context; } 1; Unicode-LineBreak-Unicode-LineBreak-2016.007_02/lib/Unicode/LineBreak.pod000066400000000000000000000650331273566223400254250ustar00rootroot00000000000000=encoding utf-8 =head1 NAME Unicode::LineBreak - UAX #14 Unicode Line Breaking Algorithm =head1 SYNOPSIS use Unicode::LineBreak; $lb = Unicode::LineBreak->new(); $broken = $lb->break($string); =head1 DESCRIPTION Unicode::LineBreak performs Line Breaking Algorithm described in Unicode Standard Annex #14 [UAX #14]. East_Asian_Width informative property defined by Annex #11 [UAX #11] will be concerned to determine breaking positions. =head2 Terminology Following terms are used for convenience. B is obligatory line breaking behavior defined by core rules and performed regardless of surrounding characters. B is line breaking behavior allowed by core rules and chosen by user to perform it. Arbitrary break includes B and B defined by [UAX #14]. B are characters usually no line breaks are allowed between pairs of them, except that other characters provide break oppotunities. B are characters that usually allow line breaks both before and after themselves. [UAX #14] classifies most of alphabetic to AL and most of ideographic to ID (These terms are inaccurate from the point of view by grammatology). On several scripts, breaking positions are not obvious by each characters therefore heuristic based on dictionary is used. B of a string is not always equal to the number of characters it contains: Each of characters is either B, B or nonspacing; they occupy 2, 1 or 0 columns, respectively. Several characters may be both wide and narrow by the contexts they are used. Characters may have more various widths by customization. =head1 PUBLIC INTERFACE =head2 Line Breaking =over 4 =item new ([KEY => VALUE, ...]) I. About KEY => VALUE pairs see L. =item break (STRING) I. Break Unicode string STRING and returns it. In array context, returns array of lines contained in the result. =item break_partial (STRING) I. Same as break() but accepts incremental inputs. Give C as STRING argument to specify that input was completed. =item config (KEY) =item config (KEY => VALUE, ...) I. Get or update configuration. About KEY => VALUE pairs see L. =item copy I. Create a copy of object instance. =begin comment =item reset I. =end comment =back =head2 Getting Informations =over 4 =item breakingRule (BEFORESTR, AFTERSTR) I. Get possible line breaking behavior between strings BEFORESTR and AFTERSTR. See L for returned value. B: This method gives just approximate description of line breaking behavior. Use break() and so on to wrap actual texts. =item context ([Charset => CHARSET], [Language => LANGUAGE]) I. Get language/region context used by character set CHARSET or language LANGUAGE. =back =begin comment =head3 Methods Planned to be Deprecated =over 4 =item lbrule (BEFORE, AFTER) I. Get possible line breaking behavior between class BEFORE and class AFTER. See L for returned value. B: This method gives just approximate description of line breaking behavior. Use break() and so on to wrap actual texts. B: Use breakingRule(). =item strsize (LEN, PRE, SPC, STR) I. Calculate I of Unicode string PRE.SPC.STR based on character widths defined by [UAX #11]. B: Use L. =back =end comment =head2 Options L and L methods accept following pairs. Some of them affect number of columns ([B]), grapheme cluster segmentation ([B]) (see also L) or line breaking behavior ([B]). =over 4 =item BreakIndent => C<"YES"> | C<"NO"> [B] Always allows break after SPACEs at beginning of line, a.k.a. indent. [UAX #14] does not take account of such usage of SPACE. Default is C<"YES">. B: This option was introduced at release 1.011. =item CharMax => NUMBER [B] Possible maximum number of characters in one line, not counting trailing SPACEs and newline sequence. Note that number of characters generally doesn't represent length of line. Default is C<998>. C<0> means unlimited (as of release 2012.01). =item ColMin => NUMBER [B] Minimum number of columns which line broken arbitrarily may include, not counting trailing spaces and newline sequences. Default is C<0>. =item ColMax => NUMBER [B] Maximum number of columns line may include not counting trailing spaces and newline sequence. In other words, maximum length of line. Default is C<76>. =back See also L option and L. =over 4 =item ComplexBreaking => C<"YES"> | C<"NO"> [B] Performs heuristic breaking on South East Asian complex context. Default is, if word segmentation for South East Asian writing systems is enabled, C<"YES">. =item Context => CONTEXT [B][B] Specify language/region context. Currently available contexts are C<"EASTASIAN"> and C<"NONEASTASIAN">. Default context is C<"NONEASTASIAN">. In C<"EASTASIAN"> context, characters with East_Asian_Width property ambiguous (A) are treated as "wide" and with Line Breaking Class AI as ideographic (ID). In C<"NONEASTASIAN"> context, characters with East_Asian_Width property ambiguous (A) are treated as "narrow" and with Line Breaking Class AI as alphabetic (AL). =item EAWidth => C<[> ORD C<=E> PROPERTY C<]> =item EAWidth => C [B] Tailor classification of East_Asian_Width property. ORD is UCS scalar value of character or array reference of them. PROPERTY is one of East_Asian_Width property values and extended values (See L). This option may be specified multiple times. If C is specified, all tailoring assigned before will be canceled. By default, no tailorings are available. See also L. =item Format => METHOD [B] Specify the method to format broken lines. =over 4 =item C<"SIMPLE"> Default method. Just only insert newline at arbitrary breaking positions. =item C<"NEWLINE"> Insert or replace newline sequences with that specified by L option, remove SPACEs leading newline sequences or end-of-text. Then append newline at end of text if it does not exist. =item C<"TRIM"> Insert newline at arbitrary breaking positions. Remove SPACEs leading newline sequences. =item C Do nothing, even inserting any newlines. =item Subroutine reference See L. =back =item HangulAsAL => C<"YES"> | C<"NO"> [B] Treat hangul syllables and conjoining jamos as alphabetic characters (AL). Default is C<"NO">. =item LBClass => C<[> ORD C<=E> CLASS C<]> =item LBClass => C [B][B] Tailor classification of line breaking property. ORD is UCS scalar value of character or array reference of them. CLASS is one of line breaking classes (See L). This option may be specified multiple times. If C is specified, all tailoring assigned before will be canceled. By default, no tailorings are available. See also L. =item LegacyCM => C<"YES"> | C<"NO"> [B][B] Treat combining characters lead by a SPACE as an isolated combining character (ID). As of Unicode 5.0, such use of SPACE is not recommended. Default is C<"YES">. =item Newline => STRING [B] Unicode string to be used for newline sequence. Default is C<"\n">. =item Prep => METHOD [B] Add user-defined line breaking behavior(s). This option may be specified multiple times. Following methods are available. =over 4 =item C<"NONBREAKURI"> Won't break URIs. =item C<"BREAKURI"> Break URIs according to a rule suitable for printed materials. For more details see [CMOS], sections 6.17 and 17.11. =item C<[> REGEX, SUBREF C<]> The sequences matching regular expression REGEX will be broken by subroutine referred by SUBREF. For more details see L. =item C Cancel all methods assigned before. =back =item Sizing => METHOD [B] Specify method to calculate size of string. Following options are available. =over 4 =item C<"UAX11"> Default method. Sizes are computed by columns of each characters accoring to built-in character database. =item C Number of grapheme clusters (see L) contained in the string. =item Subroutine reference See L. =back See also L, L and L options. =item Urgent => METHOD [B] Specify method to handle excessing lines. Following options are available. =over 4 =item C<"CROAK"> Print error message and die. =item C<"FORCE"> Force breaking excessing fragment. =item C Default method. Won't break excessing fragment. =item Subroutine reference See L. =back =item ViramaAsJoiner => C<"YES"> | C<"NO"> [B] Virama sign ("halant" in Hindi, "coeng" in Khmer) and its succeeding letter are not broken. Default is C<"YES">. B: This option was introduced by release 2012.001_29. On previous releases, it was fixed to C<"NO">. "Default" grapheme cluster defined by [UAX #29] does not include this feature. =back =begin comment =head3 Obsoleted Options =over 4 =item TailorEA => C<[> ORD C<=E> PROPERTY, ... C<]> Obsoleted equivalent to L. =item TailorLB => C<[> ORD C<=E> CLASS, ... C<]> Obsoleted equivalent to L. =item UserBreaking => C<[>METHOD, ...C<]> Obsoleted equivalent to L. =back =end comment =head2 Constants =over 4 =item C, C, C, C, C, C Index values to specify six East_Asian_Width property values defined by [UAX #11]: narrow (Na), neutral (N), ambiguous (A), wide (W), halfwidth (H) and fullwidth (F). =item C Index value to specify nonspacing characters. B: This "nonspacing" value is extension by this module, not a part of [UAX #11]. =begin comment C and C: Undocumented. Earlier releases had only C. C and C were added by release 2012.10. =end comment =item C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C Index values to specify 40 line breaking property values (classes) defined by [UAX #14]. B: Property value CP was introduced by Unicode 5.2.0. Property values HL and CJ were introduced by Unicode 6.1.0. Property value RI was introduced by Unicode 6.2.0. =item C, C, C, C Four values to specify line breaking behaviors: Mandatory break; Both direct break and indirect break are allowed; Indirect break is allowed but direct break is prohibited; Prohibited break. =item C Flag to determin if word segmentation for South East Asian writing systems is enabled. If this feature was enabled, a non-empty string is set. Otherwise, C is set. B: Current release supports Thai script of modern Thai language only. =item C A string to specify version of Unicode standard this module refers. =back =head1 CUSTOMIZATION =head2 Formatting Lines If you specify subroutine reference as a value of L option, it should accept three arguments: $MODIFIED = &subroutine(SELF, EVENT, STR); SELF is a Unicode::LineBreak object, EVENT is a string to determine the context that subroutine was called in, and STR is a fragment of Unicode string leading or trailing breaking position. EVENT |When Fired |Value of STR ----------------------------------------------------------------- "sot" |Beginning of text |Fragment of first line "sop" |After mandatory break|Fragment of next line "sol" |After arbitrary break|Fragment on sequel of line "" |Just before any |Complete line without trailing |breaks |SPACEs "eol" |Arbitrary break |SPACEs leading breaking position "eop" |Mandatory break |Newline and its leading SPACEs "eot" |End of text |SPACEs (and newline) at end of | |text ----------------------------------------------------------------- Subroutine should return modified text fragment or may return C to express that no modification occurred. Note that modification in the context of C<"sot">, C<"sop"> or C<"sol"> may affect decision of successive breaking positions while in the others won't. B: String arguments are actually sequences of grapheme clusters. See L. For example, following code folds lines removing trailing spaces: sub fmt { if ($_[1] =~ /^eo/) { return "\n"; } return undef; } my $lb = Unicode::LineBreak->new(Format => \&fmt); $output = $lb->break($text); =head2 User-Defined Breaking Behaviors When a line generated by arbitrary break is expected to be beyond measure of either CharMax, ColMax or ColMin, B may be performed on successive string. If you specify subroutine reference as a value of L option, it should accept two arguments: @BROKEN = &subroutine(SELF, STR); SELF is a Unicode::LineBreak object and STR is a Unicode string to be broken. Subroutine should return an array of broken string STR. B: String argument is actually a sequence of grapheme clusters. See L. For example, following code inserts hyphen to the name of several chemical substances (such as Titin) so that it may be folded: sub hyphenize { return map {$_ =~ s/yl$/yl-/; $_} split /(\w+?yl(?=\w))/, $_[1]; } my $lb = Unicode::LineBreak->new(Urgent => \&hyphenize); $output = $lb->break("Methionylthreonylthreonylglutaminylarginyl..."); If you specify [REGEX, SUBREF] array reference as any of L option, subroutine should accept two arguments: @BROKEN = &subroutine(SELF, STR); SELF is a Unicode::LineBreak object and STR is a Unicode string matched with REGEX. Subroutine should return an array of broken string STR. For example, following code will break HTTP URLs using [CMOS] rule. my $url = qr{http://[\x21-\x7E]+}i; sub breakurl { my $self = shift; my $str = shift; return split m{(?<=[/]) (?=[^/]) | (?<=[^-.]) (?=[-~.,_?\#%=&]) | (?<=[=&]) (?=.)}x, $str; } my $lb = Unicode::LineBreak->new(Prep => [$url, \&breakurl]); $output = $lb->break($string); =head3 Preserving State Unicode::LineBreak object can behave as hash reference. Any items may be preserved throughout its life. For example, following code will separate paragraphs with empty lines. sub paraformat { my $self = shift; my $action = shift; my $str = shift; if ($action eq 'sot' or $action eq 'sop') { $self->{'line'} = ''; } elsif ($action eq '') { $self->{'line'} = $str; } elsif ($action eq 'eol') { return "\n"; } elsif ($action eq 'eop') { if (length $self->{'line'}) { return "\n\n"; } else { return "\n"; } } elsif ($action eq 'eot') { return "\n"; } return undef; } my $lb = Unicode::LineBreak->new(Format => \¶format); $output = $lb->break($string); =head2 Calculating String Size If you specify subroutine reference as a value of L option, it will be called with five arguments: $COLS = &subroutine(SELF, LEN, PRE, SPC, STR); SELF is a Unicode::LineBreak object, LEN is size of preceding string, PRE is preceding Unicode string, SPC is additional SPACEs and STR is a Unicode string to be processed. Subroutine should return calculated number of columns of C. The number of columns may not be an integer: Unit of the number may be freely chosen, however, it should be same as those of L and L option. B: String arguments are actually sequences of grapheme clusters. See L. For example, following code processes lines with tab stops by each eight columns. sub tabbedsizing { my ($self, $cols, $pre, $spc, $str) = @_; my $spcstr = $spc.$str; while ($spcstr->lbc == LB_SP) { my $c = $spcstr->item(0); if ($c eq "\t") { $cols += 8 - $cols % 8; } else { $cols += $c->columns; } $spcstr = $spcstr->substr(1); } $cols += $spcstr->columns; return $cols; }; my $lb = Unicode::LineBreak->new(LBClass => [ord("\t") => LB_SP], Sizing => \&tabbedsizing); $output = $lb->break($string); =head2 Tailoring Character Properties Character properties may be tailored by L and L options. Some constants are defined for convenience of tailoring. =head3 Line Breaking Properties =head4 Non-starters of Kana-like Characters By default, several hiragana, katakana and characters corresponding to kana are treated as non-starters (NS or CJ). When the following pair(s) are specified for value of L option, these characters are treated as normal ideographic characters (ID). =over 4 =item C LB_ID> All of characters below. =item C LB_ID> Ideographic iteration marks. U+3005 IDEOGRAPHIC ITERATION MARK, U+303B VERTICAL IDEOGRAPHIC ITERATION MARK, U+309D HIRAGANA ITERATION MARK, U+309E HIRAGANA VOICED ITERATION MARK, U+30FD KATAKANA ITERATION MARK and U+30FE KATAKANA VOICED ITERATION MARK. N.B. Some of them are neither hiragana nor katakana. =item C LB_ID> =item C LB_ID> Hiragana or katakana small letters: Hiragana small letters U+3041 A, U+3043 I, U+3045 U, U+3047 E, U+3049 O, U+3063 TU, U+3083 YA, U+3085 YU, U+3087 YO, U+308E WA, U+3095 KA, U+3096 KE. Katakana small letters U+30A1 A, U+30A3 I, U+30A5 U, U+30A7 E, U+30A9 O, U+30C3 TU, U+30E3 YA, U+30E5 YU, U+30E7 YO, U+30EE WA, U+30F5 KA, U+30F6 KE. Katakana phonetic extensions U+31F0 KU - U+31FF RO. Halfwidth katakana small letters U+FF67 A - U+FF6F TU. Hiragana or katakana prolonged sound marks: U+30FC KATAKANA-HIRAGANA PROLONGED SOUND MARK and U+FF70 HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK. N.B. These letters are optionally treated either as non-starter or as normal ideographic. See [JIS X 4051] 6.1.1, [JLREQ] 3.1.7 or [UAX14]. N.B. U+3095, U+3096, U+30F5, U+30F6 are considered to be neither hiragana nor katakana. =item C LB_ID> U+303C MASU MARK. N.B. Although this character is not kana, it is usually regarded as abbreviation to sequence of hiragana E<0x307E> E<0x3059> or katakana E<0x30DE> E<0x30B9>, MA and SU. N.B. This character is classified as non-starter (NS) by [UAX #14] and as the class corresponding to ID by [JIS X 4051] and [JLREQ]. =back =head4 Ambiguous Quotation Marks By default, some punctuations are ambiguous quotation marks (QU). =over 4 =item C LB_OP, FORWARD_QUOTES() =E LB_CL> Some languages (Dutch, English, Italian, Portugese, Spanish, Turkish and most East Asian) use rotated-9-style punctuations (E<0x2018> E<0x201C>) as opening and 9-style punctuations (E<0x2019> E<0x201D>) as closing quotation marks. =item C LB_OP, BACKWARD_QUOTES() =E LB_CL> Some others (Czech, German and Slovak) use 9-style punctuations (E<0x2019> E<0x201D>) as opening and rotated-9-style punctuations (E<0x2018> E<0x201C>) as closing quotation marks. =item C LB_OP, FORWARD_GUILLEMETS() =E LB_CL> French, Greek, Russian etc. use left-pointing guillemets (E<0x00AB> E<0x2039>) as opening and right-pointing guillemets (E<0x00BB> E<0x203A>) as closing quotation marks. =item C LB_OP, BACKWARD_GUILLEMETS() =E LB_CL> German and Slovak use right-pointing guillemets (E<0x00BB> E<0x203A>) as opening and left-pointing guillemets (E<0x00AB> E<0x2039>) as closing quotation marks. =back Danish, Finnish, Norwegian and Swedish use 9-style or right-pointing punctuations (E<0x2019> E<0x201D> E<0x00BB> E<0x203A>) as both opening and closing quotation marks. =head4 IDEOGRAPHIC SPACE =over 4 =item C LB_BA> U+3000 IDEOGRAPHIC SPACE won't be placed at beginning of line. This is default behavior. =item C LB_ID> IDEOGRAPHIC SPACE can be placed at beginning of line. This was default behavior by Unicode 6.2 and earlier. =item C LB_SP> IDEOGRAPHIC SPACE won't be placed at beginning of line, and will protrude from end of line. =back =head3 East_Asian_Width Properties Some particular letters of Latin, Greek and Cyrillic scripts have ambiguous (A) East_Asian_Width property. Thus, these characters are treated as wide in C<"EASTASIAN"> context. Specifying C [ AMBIGUOUS_>*C<() =E EA_N ]>, those characters are always treated as narrow. =over 4 =item C EA_N> Treat all of characters below as East_Asian_Width neutral (N). =item C EA_N> =item C EA_N> =item C EA_N> Treate letters having ambiguous (A) width of Cyrillic, Greek and Latin scripts as neutral (N). =back On the other hand, despite several characters were occasionally rendered as wide characters by number of implementations for East Asian character sets, they are given narrow (Na) East_Asian_Width property just because they have fullwidth (F) compatibility characters. Specifying C as below, those characters are treated as ambiguous --- wide on C<"EASTASIAN"> context. =over 4 =item C EA_A> U+00A2 CENT SIGN, U+00A3 POUND SIGN, U+00A5 YEN SIGN (or yuan sign), U+00A6 BROKEN BAR, U+00AC NOT SIGN, U+00AF MACRON. =back =head2 Configuration File Built-in defaults of option parameters for L and L method can be overridden by configuration files: F. For more details read F. =head1 BUGS Please report bugs or buggy behaviors to developer. CPAN Request Tracker: L. =head1 VERSION Consult $VERSION variable. =head2 Incompatible Changes =over 4 =item Release 2012.06 =over 4 =item * eawidth() method was deprecated. L may be used instead. =item * lbclass() method was deprecated. Use L or L. =back =back =head2 Conformance to Standards Character properties this module is based on are defined by Unicode Standard version 8.0.0. This module is intended to implement UAX14-C2. =head1 IMPLEMENTATION NOTES =over 4 =item * Some ideographic characters may be treated either as NS or as ID by choice. =item * Hangul syllables and conjoining jamos may be treated as either ID or AL by choice. =item * Characters assigned to AI may be resolved to either AL or ID by choice. =item * Character(s) assigned to CB are not resolved. =item * Characters assigned to CJ are always resolved to NS. More flexible tailoring mechanism is provided. =item * When word segmentation for South East Asian writing systems is not supported, characters assigned to SA are resolved to AL, except that characters that have Grapheme_Cluster_Break property value Extend or SpacingMark be resolved to CM. =item * Characters assigned to SG or XX are resolved to AL. =item * Code points of following UCS ranges are given fixed property values even if they have not been assigned any characers. Ranges | UAX #14 | UAX #11 | Description ------------------------------------------------------------- U+20A0..U+20CF | PR [*1] | N [*2] | Currency symbols U+3400..U+4DBF | ID | W | CJK ideographs U+4E00..U+9FFF | ID | W | CJK ideographs U+D800..U+DFFF | AL (SG) | N | Surrogates U+E000..U+F8FF | AL (XX) | F or N (A) | Private use U+F900..U+FAFF | ID | W | CJK ideographs U+20000..U+2FFFD | ID | W | CJK ideographs U+30000..U+3FFFD | ID | W | Old hanzi U+F0000..U+FFFFD | AL (XX) | F or N (A) | Private use U+100000..U+10FFFD | AL (XX) | F or N (A) | Private use Other unassigned | AL (XX) | N | Unassigned, | | | reserved or | | | noncharacters ------------------------------------------------------------- [*1] Except U+20A7 PESETA SIGN (PO), U+20B6 LIVRE TOURNOIS SIGN (PO), U+20BB NORDIC MARK SIGN (PO) and U+20BE LARI SIGN (PO). [*2] Except U+20A9 WON SIGN (H) and U+20AC EURO SIGN (F or N (A)). =item * Characters belonging to General Category Mn, Me, Cc, Cf, Zl or Zp are treated as nonspacing by this module. =back =head1 REFERENCES =over 4 =item [CMOS] I, 15th edition. University of Chicago Press, 2003. =item [JIS X 4051] JIS X 4051:2004 I<日本語文書の組版方法> (I). Japanese Standards Association, 2004. =item [JLREQ] Anan, Yasuhiro et al. I, W3C Working Group Note 3 April 2012. L. =begin comment =item [Kubota] Kubota, Tomohiro (2001-2002). Width problems, "I". L. =end comment =item [UAX #11] A. Freytag (ed.) (2008-2009). I, Revisions 17-19. L. =item [UAX #14] A. Freytag and A. Heninger (eds.) (2008-2015). I, Revisions 22-35. L. =item [UAX #29] Mark Davis (ed.) (2009-2013). I, Revisions 15-23. L. =back =head1 SEE ALSO L, L, L. =head1 AUTHOR Copyright (C) 2009-2013 Hatuka*nezumi - IKEDA Soji . This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut Unicode-LineBreak-Unicode-LineBreak-2016.007_02/lib/Unicode/LineBreak/000077500000000000000000000000001273566223400247125ustar00rootroot00000000000000Unicode-LineBreak-Unicode-LineBreak-2016.007_02/lib/Unicode/LineBreak/Constants.pm000066400000000000000000000042651273566223400272330ustar00rootroot00000000000000#-*- perl -*- package Unicode::LineBreak; use constant { M => 4, D => 3, I => 2, P => 1,}; use constant { MANDATORY => M, DIRECT => D, INDIRECT => I, PROHIBITED => P, URGENT => 200,}; use constant { ALLOW_BEFORE => 2, PROHIBIT_BEFORE => 1, BREAK_BEFORE => 2, # deprecated. FLAGS => (2 | 1) }; use constant { AMBIGUOUS_CYRILLIC => [0x0401, 0x0410..0x044F, 0x0451, ], AMBIGUOUS_GREEK => [0x0391..0x03A9, 0x03B1..0x03C1, 0x03C3..0x03C9, ], AMBIGUOUS_LATIN => [0x00C6, 0x00D0, 0x00D8, 0x00DE, 0x00DF, 0x00E0, 0x00E1, 0x00E6, 0x00E8, 0x00E9, 0x00EA, 0x00EC, 0x00ED, 0x00F0, 0x00F2, 0x00F3, 0x00F8, 0x00F9, 0x00FA, 0x00FC, 0x00FE, 0x0101, 0x0111, 0x0113, 0x011B, 0x0126, 0x0127, 0x012B, 0x0131, 0x0132, 0x0133, 0x0138, 0x013F, 0x0140, 0x0141, 0x0142, 0x0144, 0x0148, 0x0149, 0x014A, 0x014B, 0x014D, 0x0152, 0x0153, 0x0166, 0x0167, 0x016B, 0x01CE, 0x01D0, 0x01D2, 0x01D4, 0x01D6, 0x01D8, 0x01DA, 0x01DC, 0x0251, 0x0261, ], IDEOGRAPHIC_ITERATION_MARKS => [0x3005, 0x303B, 0x309D, 0x309E, 0x30FD, 0x30FE, ], KANA_PROLONGED_SOUND_MARKS => [0x30FC, 0xFF70, ], KANA_SMALL_LETTERS => [0x3041, 0x3043, 0x3045, 0x3047, 0x3049, 0x3063, 0x3083, 0x3085, 0x3087, 0x308E, 0x3095, 0x3096, 0x30A1, 0x30A3, 0x30A5, 0x30A7, 0x30A9, 0x30C3, 0x30E3, 0x30E5, 0x30E7, 0x30EE, 0x30F5, 0x30F6, 0x31F0..0x31FF, 0xFF67..0xFF6F, ], MASU_MARK => [0x303C, ], QUESTIONABLE_NARROW_SIGNS => [0x00A2, 0x00A3, 0x00A5, 0x00A6, 0x00AC, 0x00AF, ], }; use constant { AMBIGUOUS_ALPHABETICS => [ @{AMBIGUOUS_CYRILLIC()}, @{AMBIGUOUS_GREEK()}, @{AMBIGUOUS_LATIN()}, ], KANA_NONSTARTERS => [ @{IDEOGRAPHIC_ITERATION_MARKS()}, @{KANA_PROLONGED_SOUND_MARKS()}, @{KANA_SMALL_LETTERS()}, @{MASU_MARK()}, ] }; use constant { BACKWORD_GUILLEMETS => [ 0x00AB, 0x2039, ], FORWARD_GUILLEMETS => [ 0x00BB, 0x203A, ], BACKWORD_QUOTES => [ 0x2018, 0x201C, ], FORWARD_QUOTES => [ 0x2019, 0x201D, ], }; # obsoleted names. use constant { LEFT_GUILLEMETS => BACKWORD_GUILLEMETS(), RIGHT_GUILLEMETS => FORWARD_GUILLEMETS(), LEFT_QUOTES => BACKWORD_QUOTES(), RIGHT_QUOTES => FORWARD_QUOTES(), }; use constant { IDEOGRAPHIC_SPACE => [ 0x3000, ], }; 1; Unicode-LineBreak-Unicode-LineBreak-2016.007_02/lib/Unicode/LineBreak/Defaults.pm.sample000066400000000000000000000025531273566223400303040ustar00rootroot00000000000000#-*- perl -*- package Unicode::LineBreak; =head1 NAME Unicode::LineBreak::Defaults - Configuration for Unicode::LineBreak =head1 SYNOPSIS Edit this file and place it on Unicode/LineBreak/Defaults.pm to activate custom settings. =head1 DESCRIPTION Following settings are available. =over 4 =item * BreakIndent =item * CharMax =item * ColMin =item * ColMax =item * ComplexBreaking =item * Context =item * EAWidth =item * Format =item * HangulAsAL =item * LBClass =item * LegacyCM =item * Newline =item * Prep =item * Sizing =item * Urgent =item * ViramaAsJoiner =back =head1 SEE ALSO L =cut #--------------------------------------------------------------------------# # Add your own settings below. #--------------------------------------------------------------------------# ## Default settings on current release are: # $Config->{BreakIndent} = 'YES'; # $Config->{CharMax} = 998; # $Config->{ColMin} = 0; # $Config->{ColMax} = 76; # $Config->{ComplexBreaking} = 'YES'; # $Config->{Context} = 'NONEASTASIAN'; # $Config->{EAWidth} = undef; # $Config->{Format} = 'SIMPLE'; # $Config->{HangulAsAL} = 'NO'; # $Config->{LBClass} = undef; # $Config->{LegacyCM} = 'YES'; # $Config->{Newline} = "\n"; # $Config->{Prep} = undef; # $Config->{Sizing} = 'UAX11'; # $Config->{Urgent} = undef; # $Config->{ViramaAsJoiner} = 'YES'; 1; Unicode-LineBreak-Unicode-LineBreak-2016.007_02/perl-Unicode-LineBreak.spec000066400000000000000000000042561273566223400257650ustar00rootroot00000000000000%define module Unicode-LineBreak %define version 2013.009_26 %define release 1%{?dist} %define sombok_version 2.3.1 %define sombok_max_version 2.99.99 Name: perl-%{module} Version: %{version} Release: %{release} License: GPL+ or Artistic Group: Development/Perl Summary(ja): UAX #14 Unicode 行分割アルゴリズム Summary: UAX #14 Unicode Line Breaking Algorithm Url: http://search.cpan.org/dist/%{module} Source: http://search.cpan.org/CPAN/authors/id/N/NE/NEZUMI/%{module}-%{version}.tar.gz Requires: perl(Encode) Requires: perl(MIME::Charset) >= 1.006.2 Requires: sombok >= %{sombok_version} Requires: sombok <= %{sombok_max_version} BuildRequires: perl(ExtUtils::MakeMaker) >= 6.26 BuildRequires: perl(MIME::Charset) >= 1.006.2 BuildRequires: perl(Test::More) #BuildRequires: perl(Test::Pod) BuildRequires: sombok-devel >= %{sombok_version} BuildRequires: sombok-devel <= %{sombok_max_version} BuildRequires: pkgconfig BuildRoot: %{_tmppath}/%{name}-%{version} AutoProv: yes AutoReq: no %description -l ja Unicode::LineBreak は、Unicode 標準の附属書14 [UAX #14] で述べる Unicode 行分割アルゴリズムを実行する。分割位置を決定する際に、附属 書11 [UAX #11] で定義される East_Asian_Width 参考特性も考慮する。 %description Unicode::LineBreak performs Line Breaking Algorithm described in Unicode Standard Annex #14 [UAX #14]. East_Asian_Width informative properties defined by Annex #11 [UAX #11] will be concerned to determine breaking positions. %prep %setup -q -n %{module}-%{version} %build %{__perl} Makefile.PL INSTALLDIRS=vendor make %check make test %install rm -rf %buildroot make install DESTDIR=%buildroot rm -f %{buildroot}%{perl_archlib}/perllocal.pod rm -f %{buildroot}%{perl_vendorarch}/auto/Unicode/LineBreak/.packlist mkdir -p %{buildroot}%{_mandir}/ja/man3 for mod in Text::LineFold Unicode::GCString Unicode::LineBreak; do mv %{buildroot}%{_mandir}/man3/POD2::JA::$mod.3pm \ %{buildroot}%{_mandir}/ja/man3/$mod.3pm done %clean rm -rf %buildroot %files %defattr(-,root,root) %doc ARTISTIC Changes* GPL README* Todo* %{_mandir}/man3/* %{_mandir}/*/man3/* %{perl_vendorarch}/* %changelog Unicode-LineBreak-Unicode-LineBreak-2016.007_02/ppport.h000066400000000000000000005254061273566223400224330ustar00rootroot00000000000000#if 0 <<'SKIP'; #endif /* ---------------------------------------------------------------------- ppport.h -- Perl/Pollution/Portability Version 3.19 Automatically created by Devel::PPPort running under perl 5.012002. Do NOT edit this file directly! -- Edit PPPort_pm.PL and the includes in parts/inc/ instead. Use 'perldoc ppport.h' to view the documentation below. ---------------------------------------------------------------------- SKIP =pod =head1 NAME ppport.h - Perl/Pollution/Portability version 3.19 =head1 SYNOPSIS perl ppport.h [options] [source files] Searches current directory for files if no [source files] are given --help show short help --version show version --patch=file write one patch file with changes --copy=suffix write changed copies with suffix --diff=program use diff program and options --compat-version=version provide compatibility with Perl version --cplusplus accept C++ comments --quiet don't output anything except fatal errors --nodiag don't show diagnostics --nohints don't show hints --nochanges don't suggest changes --nofilter don't filter input files --strip strip all script and doc functionality from ppport.h --list-provided list provided API --list-unsupported list unsupported API --api-info=name show Perl API portability information =head1 COMPATIBILITY This version of F is designed to support operation with Perl installations back to 5.003, and has been tested up to 5.10.0. =head1 OPTIONS =head2 --help Display a brief usage summary. =head2 --version Display the version of F. =head2 --patch=I If this option is given, a single patch file will be created if any changes are suggested. This requires a working diff program to be installed on your system. =head2 --copy=I If this option is given, a copy of each file will be saved with the given suffix that contains the suggested changes. This does not require any external programs. Note that this does not automagially add a dot between the original filename and the suffix. If you want the dot, you have to include it in the option argument. If neither C<--patch> or C<--copy> are given, the default is to simply print the diffs for each file. This requires either C or a C program to be installed. =head2 --diff=I Manually set the diff program and options to use. The default is to use C, when installed, and output unified context diffs. =head2 --compat-version=I Tell F to check for compatibility with the given Perl version. The default is to check for compatibility with Perl version 5.003. You can use this option to reduce the output of F if you intend to be backward compatible only down to a certain Perl version. =head2 --cplusplus Usually, F will detect C++ style comments and replace them with C style comments for portability reasons. Using this option instructs F to leave C++ comments untouched. =head2 --quiet Be quiet. Don't print anything except fatal errors. =head2 --nodiag Don't output any diagnostic messages. Only portability alerts will be printed. =head2 --nohints Don't output any hints. Hints often contain useful portability notes. Warnings will still be displayed. =head2 --nochanges Don't suggest any changes. Only give diagnostic output and hints unless these are also deactivated. =head2 --nofilter Don't filter the list of input files. By default, files not looking like source code (i.e. not *.xs, *.c, *.cc, *.cpp or *.h) are skipped. =head2 --strip Strip all script and documentation functionality from F. This reduces the size of F dramatically and may be useful if you want to include F in smaller modules without increasing their distribution size too much. The stripped F will have a C<--unstrip> option that allows you to undo the stripping, but only if an appropriate C module is installed. =head2 --list-provided Lists the API elements for which compatibility is provided by F. Also lists if it must be explicitly requested, if it has dependencies, and if there are hints or warnings for it. =head2 --list-unsupported Lists the API elements that are known not to be supported by F and below which version of Perl they probably won't be available or work. =head2 --api-info=I Show portability information for API elements matching I. If I is surrounded by slashes, it is interpreted as a regular expression. =head1 DESCRIPTION In order for a Perl extension (XS) module to be as portable as possible across differing versions of Perl itself, certain steps need to be taken. =over 4 =item * Including this header is the first major one. This alone will give you access to a large part of the Perl API that hasn't been available in earlier Perl releases. Use perl ppport.h --list-provided to see which API elements are provided by ppport.h. =item * You should avoid using deprecated parts of the API. For example, using global Perl variables without the C prefix is deprecated. Also, some API functions used to have a C prefix. Using this form is also deprecated. You can safely use the supported API, as F will provide wrappers for older Perl versions. =item * If you use one of a few functions or variables that were not present in earlier versions of Perl, and that can't be provided using a macro, you have to explicitly request support for these functions by adding one or more C<#define>s in your source code before the inclusion of F. These functions or variables will be marked C in the list shown by C<--list-provided>. Depending on whether you module has a single or multiple files that use such functions or variables, you want either C or global variants. For a C function or variable (used only in a single source file), use: #define NEED_function #define NEED_variable For a global function or variable (used in multiple source files), use: #define NEED_function_GLOBAL #define NEED_variable_GLOBAL Note that you mustn't have more than one global request for the same function or variable in your project. Function / Variable Static Request Global Request ----------------------------------------------------------------------------------------- PL_parser NEED_PL_parser NEED_PL_parser_GLOBAL PL_signals NEED_PL_signals NEED_PL_signals_GLOBAL eval_pv() NEED_eval_pv NEED_eval_pv_GLOBAL grok_bin() NEED_grok_bin NEED_grok_bin_GLOBAL grok_hex() NEED_grok_hex NEED_grok_hex_GLOBAL grok_number() NEED_grok_number NEED_grok_number_GLOBAL grok_numeric_radix() NEED_grok_numeric_radix NEED_grok_numeric_radix_GLOBAL grok_oct() NEED_grok_oct NEED_grok_oct_GLOBAL load_module() NEED_load_module NEED_load_module_GLOBAL my_snprintf() NEED_my_snprintf NEED_my_snprintf_GLOBAL my_sprintf() NEED_my_sprintf NEED_my_sprintf_GLOBAL my_strlcat() NEED_my_strlcat NEED_my_strlcat_GLOBAL my_strlcpy() NEED_my_strlcpy NEED_my_strlcpy_GLOBAL newCONSTSUB() NEED_newCONSTSUB NEED_newCONSTSUB_GLOBAL newRV_noinc() NEED_newRV_noinc NEED_newRV_noinc_GLOBAL newSV_type() NEED_newSV_type NEED_newSV_type_GLOBAL newSVpvn_flags() NEED_newSVpvn_flags NEED_newSVpvn_flags_GLOBAL newSVpvn_share() NEED_newSVpvn_share NEED_newSVpvn_share_GLOBAL pv_display() NEED_pv_display NEED_pv_display_GLOBAL pv_escape() NEED_pv_escape NEED_pv_escape_GLOBAL pv_pretty() NEED_pv_pretty NEED_pv_pretty_GLOBAL sv_2pv_flags() NEED_sv_2pv_flags NEED_sv_2pv_flags_GLOBAL sv_2pvbyte() NEED_sv_2pvbyte NEED_sv_2pvbyte_GLOBAL sv_catpvf_mg() NEED_sv_catpvf_mg NEED_sv_catpvf_mg_GLOBAL sv_catpvf_mg_nocontext() NEED_sv_catpvf_mg_nocontext NEED_sv_catpvf_mg_nocontext_GLOBAL sv_pvn_force_flags() NEED_sv_pvn_force_flags NEED_sv_pvn_force_flags_GLOBAL sv_setpvf_mg() NEED_sv_setpvf_mg NEED_sv_setpvf_mg_GLOBAL sv_setpvf_mg_nocontext() NEED_sv_setpvf_mg_nocontext NEED_sv_setpvf_mg_nocontext_GLOBAL vload_module() NEED_vload_module NEED_vload_module_GLOBAL vnewSVpvf() NEED_vnewSVpvf NEED_vnewSVpvf_GLOBAL warner() NEED_warner NEED_warner_GLOBAL To avoid namespace conflicts, you can change the namespace of the explicitly exported functions / variables using the C macro. Just C<#define> the macro before including C: #define DPPP_NAMESPACE MyOwnNamespace_ #include "ppport.h" The default namespace is C. =back The good thing is that most of the above can be checked by running F on your source code. See the next section for details. =head1 EXAMPLES To verify whether F is needed for your module, whether you should make any changes to your code, and whether any special defines should be used, F can be run as a Perl script to check your source code. Simply say: perl ppport.h The result will usually be a list of patches suggesting changes that should at least be acceptable, if not necessarily the most efficient solution, or a fix for all possible problems. If you know that your XS module uses features only available in newer Perl releases, if you're aware that it uses C++ comments, and if you want all suggestions as a single patch file, you could use something like this: perl ppport.h --compat-version=5.6.0 --cplusplus --patch=test.diff If you only want your code to be scanned without any suggestions for changes, use: perl ppport.h --nochanges You can specify a different C program or options, using the C<--diff> option: perl ppport.h --diff='diff -C 10' This would output context diffs with 10 lines of context. If you want to create patched copies of your files instead, use: perl ppport.h --copy=.new To display portability information for the C function, use: perl ppport.h --api-info=newSVpvn Since the argument to C<--api-info> can be a regular expression, you can use perl ppport.h --api-info=/_nomg$/ to display portability information for all C<_nomg> functions or perl ppport.h --api-info=/./ to display information for all known API elements. =head1 BUGS If this version of F is causing failure during the compilation of this module, please check if newer versions of either this module or C are available on CPAN before sending a bug report. If F was generated using the latest version of C and is causing failure of this module, please file a bug report using the CPAN Request Tracker at L. Please include the following information: =over 4 =item 1. The complete output from running "perl -V" =item 2. This file. =item 3. The name and version of the module you were trying to build. =item 4. A full log of the build that failed. =item 5. Any other information that you think could be relevant. =back For the latest version of this code, please get the C module from CPAN. =head1 COPYRIGHT Version 3.x, Copyright (c) 2004-2009, Marcus Holland-Moritz. Version 2.x, Copyright (C) 2001, Paul Marquess. Version 1.x, Copyright (C) 1999, Kenneth Albanowski. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =head1 SEE ALSO See L. =cut use strict; # Disable broken TRIE-optimization BEGIN { eval '${^RE_TRIE_MAXBUF} = -1' if $] >= 5.009004 && $] <= 5.009005 } my $VERSION = 3.19; my %opt = ( quiet => 0, diag => 1, hints => 1, changes => 1, cplusplus => 0, filter => 1, strip => 0, version => 0, ); my($ppport) = $0 =~ /([\w.]+)$/; my $LF = '(?:\r\n|[\r\n])'; # line feed my $HS = "[ \t]"; # horizontal whitespace # Never use C comments in this file! my $ccs = '/'.'*'; my $cce = '*'.'/'; my $rccs = quotemeta $ccs; my $rcce = quotemeta $cce; eval { require Getopt::Long; Getopt::Long::GetOptions(\%opt, qw( help quiet diag! filter! hints! changes! cplusplus strip version patch=s copy=s diff=s compat-version=s list-provided list-unsupported api-info=s )) or usage(); }; if ($@ and grep /^-/, @ARGV) { usage() if "@ARGV" =~ /^--?h(?:elp)?$/; die "Getopt::Long not found. Please don't use any options.\n"; } if ($opt{version}) { print "This is $0 $VERSION.\n"; exit 0; } usage() if $opt{help}; strip() if $opt{strip}; if (exists $opt{'compat-version'}) { my($r,$v,$s) = eval { parse_version($opt{'compat-version'}) }; if ($@) { die "Invalid version number format: '$opt{'compat-version'}'\n"; } die "Only Perl 5 is supported\n" if $r != 5; die "Invalid version number: $opt{'compat-version'}\n" if $v >= 1000 || $s >= 1000; $opt{'compat-version'} = sprintf "%d.%03d%03d", $r, $v, $s; } else { $opt{'compat-version'} = 5; } my %API = map { /^(\w+)\|([^|]*)\|([^|]*)\|(\w*)$/ ? ( $1 => { ($2 ? ( base => $2 ) : ()), ($3 ? ( todo => $3 ) : ()), (index($4, 'v') >= 0 ? ( varargs => 1 ) : ()), (index($4, 'p') >= 0 ? ( provided => 1 ) : ()), (index($4, 'n') >= 0 ? ( nothxarg => 1 ) : ()), } ) : die "invalid spec: $_" } qw( AvFILLp|5.004050||p AvFILL||| CLASS|||n CPERLscope|5.005000||p CX_CURPAD_SAVE||| CX_CURPAD_SV||| CopFILEAV|5.006000||p CopFILEGV_set|5.006000||p CopFILEGV|5.006000||p CopFILESV|5.006000||p CopFILE_set|5.006000||p CopFILE|5.006000||p CopSTASHPV_set|5.006000||p CopSTASHPV|5.006000||p CopSTASH_eq|5.006000||p CopSTASH_set|5.006000||p CopSTASH|5.006000||p CopyD|5.009002||p Copy||| CvPADLIST||| CvSTASH||| CvWEAKOUTSIDE||| DEFSV_set|5.011000||p DEFSV|5.004050||p END_EXTERN_C|5.005000||p ENTER||| ERRSV|5.004050||p EXTEND||| EXTERN_C|5.005000||p F0convert|||n FREETMPS||| GIMME_V||5.004000|n GIMME|||n GROK_NUMERIC_RADIX|5.007002||p G_ARRAY||| G_DISCARD||| G_EVAL||| G_METHOD|5.006001||p G_NOARGS||| G_SCALAR||| G_VOID||5.004000| GetVars||| GvSVn|5.009003||p GvSV||| Gv_AMupdate||| HEf_SVKEY||5.004000| HeHASH||5.004000| HeKEY||5.004000| HeKLEN||5.004000| HePV||5.004000| HeSVKEY_force||5.004000| HeSVKEY_set||5.004000| HeSVKEY||5.004000| HeUTF8||5.011000| HeVAL||5.004000| HvNAMELEN_get|5.009003||p HvNAME_get|5.009003||p HvNAME||| INT2PTR|5.006000||p IN_LOCALE_COMPILETIME|5.007002||p IN_LOCALE_RUNTIME|5.007002||p IN_LOCALE|5.007002||p IN_PERL_COMPILETIME|5.008001||p IS_NUMBER_GREATER_THAN_UV_MAX|5.007002||p IS_NUMBER_INFINITY|5.007002||p IS_NUMBER_IN_UV|5.007002||p IS_NUMBER_NAN|5.007003||p IS_NUMBER_NEG|5.007002||p IS_NUMBER_NOT_INT|5.007002||p IVSIZE|5.006000||p IVTYPE|5.006000||p IVdf|5.006000||p LEAVE||| LVRET||| MARK||| MULTICALL||5.011000| MY_CXT_CLONE|5.009002||p MY_CXT_INIT|5.007003||p MY_CXT|5.007003||p MoveD|5.009002||p Move||| NOOP|5.005000||p NUM2PTR|5.006000||p NVTYPE|5.006000||p NVef|5.006001||p NVff|5.006001||p NVgf|5.006001||p Newxc|5.009003||p Newxz|5.009003||p Newx|5.009003||p Nullav||| Nullch||| Nullcv||| Nullhv||| Nullsv||| ORIGMARK||| PAD_BASE_SV||| PAD_CLONE_VARS||| PAD_COMPNAME_FLAGS||| PAD_COMPNAME_GEN_set||| PAD_COMPNAME_GEN||| PAD_COMPNAME_OURSTASH||| PAD_COMPNAME_PV||| PAD_COMPNAME_TYPE||| PAD_DUP||| PAD_RESTORE_LOCAL||| PAD_SAVE_LOCAL||| PAD_SAVE_SETNULLPAD||| PAD_SETSV||| PAD_SET_CUR_NOSAVE||| PAD_SET_CUR||| PAD_SVl||| PAD_SV||| PERLIO_FUNCS_CAST|5.009003||p PERLIO_FUNCS_DECL|5.009003||p PERL_ABS|5.008001||p PERL_BCDVERSION|5.011000||p PERL_GCC_BRACE_GROUPS_FORBIDDEN|5.008001||p PERL_HASH|5.004000||p PERL_INT_MAX|5.004000||p PERL_INT_MIN|5.004000||p PERL_LONG_MAX|5.004000||p PERL_LONG_MIN|5.004000||p PERL_MAGIC_arylen|5.007002||p PERL_MAGIC_backref|5.007002||p PERL_MAGIC_bm|5.007002||p PERL_MAGIC_collxfrm|5.007002||p PERL_MAGIC_dbfile|5.007002||p PERL_MAGIC_dbline|5.007002||p PERL_MAGIC_defelem|5.007002||p PERL_MAGIC_envelem|5.007002||p PERL_MAGIC_env|5.007002||p PERL_MAGIC_ext|5.007002||p PERL_MAGIC_fm|5.007002||p PERL_MAGIC_glob|5.011000||p PERL_MAGIC_isaelem|5.007002||p PERL_MAGIC_isa|5.007002||p PERL_MAGIC_mutex|5.011000||p PERL_MAGIC_nkeys|5.007002||p PERL_MAGIC_overload_elem|5.007002||p PERL_MAGIC_overload_table|5.007002||p PERL_MAGIC_overload|5.007002||p PERL_MAGIC_pos|5.007002||p PERL_MAGIC_qr|5.007002||p PERL_MAGIC_regdata|5.007002||p PERL_MAGIC_regdatum|5.007002||p PERL_MAGIC_regex_global|5.007002||p PERL_MAGIC_shared_scalar|5.007003||p PERL_MAGIC_shared|5.007003||p PERL_MAGIC_sigelem|5.007002||p PERL_MAGIC_sig|5.007002||p PERL_MAGIC_substr|5.007002||p PERL_MAGIC_sv|5.007002||p PERL_MAGIC_taint|5.007002||p PERL_MAGIC_tiedelem|5.007002||p PERL_MAGIC_tiedscalar|5.007002||p PERL_MAGIC_tied|5.007002||p PERL_MAGIC_utf8|5.008001||p PERL_MAGIC_uvar_elem|5.007003||p PERL_MAGIC_uvar|5.007002||p PERL_MAGIC_vec|5.007002||p PERL_MAGIC_vstring|5.008001||p PERL_PV_ESCAPE_ALL|5.009004||p PERL_PV_ESCAPE_FIRSTCHAR|5.009004||p PERL_PV_ESCAPE_NOBACKSLASH|5.009004||p PERL_PV_ESCAPE_NOCLEAR|5.009004||p PERL_PV_ESCAPE_QUOTE|5.009004||p PERL_PV_ESCAPE_RE|5.009005||p PERL_PV_ESCAPE_UNI_DETECT|5.009004||p PERL_PV_ESCAPE_UNI|5.009004||p PERL_PV_PRETTY_DUMP|5.009004||p PERL_PV_PRETTY_ELLIPSES|5.010000||p PERL_PV_PRETTY_LTGT|5.009004||p PERL_PV_PRETTY_NOCLEAR|5.010000||p PERL_PV_PRETTY_QUOTE|5.009004||p PERL_PV_PRETTY_REGPROP|5.009004||p PERL_QUAD_MAX|5.004000||p PERL_QUAD_MIN|5.004000||p PERL_REVISION|5.006000||p PERL_SCAN_ALLOW_UNDERSCORES|5.007003||p PERL_SCAN_DISALLOW_PREFIX|5.007003||p PERL_SCAN_GREATER_THAN_UV_MAX|5.007003||p PERL_SCAN_SILENT_ILLDIGIT|5.008001||p PERL_SHORT_MAX|5.004000||p PERL_SHORT_MIN|5.004000||p PERL_SIGNALS_UNSAFE_FLAG|5.008001||p PERL_SUBVERSION|5.006000||p PERL_SYS_INIT3||5.006000| PERL_SYS_INIT||| PERL_SYS_TERM||5.011000| PERL_UCHAR_MAX|5.004000||p PERL_UCHAR_MIN|5.004000||p PERL_UINT_MAX|5.004000||p PERL_UINT_MIN|5.004000||p PERL_ULONG_MAX|5.004000||p PERL_ULONG_MIN|5.004000||p PERL_UNUSED_ARG|5.009003||p PERL_UNUSED_CONTEXT|5.009004||p PERL_UNUSED_DECL|5.007002||p PERL_UNUSED_VAR|5.007002||p PERL_UQUAD_MAX|5.004000||p PERL_UQUAD_MIN|5.004000||p PERL_USE_GCC_BRACE_GROUPS|5.009004||p PERL_USHORT_MAX|5.004000||p PERL_USHORT_MIN|5.004000||p PERL_VERSION|5.006000||p PL_DBsignal|5.005000||p PL_DBsingle|||pn PL_DBsub|||pn PL_DBtrace|||pn PL_Sv|5.005000||p PL_bufend|5.011000||p PL_bufptr|5.011000||p PL_compiling|5.004050||p PL_copline|5.011000||p PL_curcop|5.004050||p PL_curstash|5.004050||p PL_debstash|5.004050||p PL_defgv|5.004050||p PL_diehook|5.004050||p PL_dirty|5.004050||p PL_dowarn|||pn PL_errgv|5.004050||p PL_error_count|5.011000||p PL_expect|5.011000||p PL_hexdigit|5.005000||p PL_hints|5.005000||p PL_in_my_stash|5.011000||p PL_in_my|5.011000||p PL_last_in_gv|||n PL_laststatval|5.005000||p PL_lex_state|5.011000||p PL_lex_stuff|5.011000||p PL_linestr|5.011000||p PL_modglobal||5.005000|n PL_na|5.004050||pn PL_no_modify|5.006000||p PL_ofsgv|||n PL_parser|5.009005||p PL_perl_destruct_level|5.004050||p PL_perldb|5.004050||p PL_ppaddr|5.006000||p PL_rsfp_filters|5.004050||p PL_rsfp|5.004050||p PL_rs|||n PL_signals|5.008001||p PL_stack_base|5.004050||p PL_stack_sp|5.004050||p PL_statcache|5.005000||p PL_stdingv|5.004050||p PL_sv_arenaroot|5.004050||p PL_sv_no|5.004050||pn PL_sv_undef|5.004050||pn PL_sv_yes|5.004050||pn PL_tainted|5.004050||p PL_tainting|5.004050||p PL_tokenbuf|5.011000||p POP_MULTICALL||5.011000| POPi|||n POPl|||n POPn|||n POPpbytex||5.007001|n POPpx||5.005030|n POPp|||n POPs|||n PTR2IV|5.006000||p PTR2NV|5.006000||p PTR2UV|5.006000||p PTR2nat|5.009003||p PTR2ul|5.007001||p PTRV|5.006000||p PUSHMARK||| PUSH_MULTICALL||5.011000| PUSHi||| PUSHmortal|5.009002||p PUSHn||| PUSHp||| PUSHs||| PUSHu|5.004000||p PUTBACK||| PerlIO_clearerr||5.007003| PerlIO_close||5.007003| PerlIO_context_layers||5.009004| PerlIO_eof||5.007003| PerlIO_error||5.007003| PerlIO_fileno||5.007003| PerlIO_fill||5.007003| PerlIO_flush||5.007003| PerlIO_get_base||5.007003| PerlIO_get_bufsiz||5.007003| PerlIO_get_cnt||5.007003| PerlIO_get_ptr||5.007003| PerlIO_read||5.007003| PerlIO_seek||5.007003| PerlIO_set_cnt||5.007003| PerlIO_set_ptrcnt||5.007003| PerlIO_setlinebuf||5.007003| PerlIO_stderr||5.007003| PerlIO_stdin||5.007003| PerlIO_stdout||5.007003| PerlIO_tell||5.007003| PerlIO_unread||5.007003| PerlIO_write||5.007003| Perl_signbit||5.009005|n PoisonFree|5.009004||p PoisonNew|5.009004||p PoisonWith|5.009004||p Poison|5.008000||p RETVAL|||n Renewc||| Renew||| SAVECLEARSV||| SAVECOMPPAD||| SAVEPADSV||| SAVETMPS||| SAVE_DEFSV|5.004050||p SPAGAIN||| SP||| START_EXTERN_C|5.005000||p START_MY_CXT|5.007003||p STMT_END|||p STMT_START|||p STR_WITH_LEN|5.009003||p ST||| SV_CONST_RETURN|5.009003||p SV_COW_DROP_PV|5.008001||p SV_COW_SHARED_HASH_KEYS|5.009005||p SV_GMAGIC|5.007002||p SV_HAS_TRAILING_NUL|5.009004||p SV_IMMEDIATE_UNREF|5.007001||p SV_MUTABLE_RETURN|5.009003||p SV_NOSTEAL|5.009002||p SV_SMAGIC|5.009003||p SV_UTF8_NO_ENCODING|5.008001||p SVfARG|5.009005||p SVf_UTF8|5.006000||p SVf|5.006000||p SVt_IV||| SVt_NV||| SVt_PVAV||| SVt_PVCV||| SVt_PVHV||| SVt_PVMG||| SVt_PV||| Safefree||| Slab_Alloc||| Slab_Free||| Slab_to_rw||| StructCopy||| SvCUR_set||| SvCUR||| SvEND||| SvGAMAGIC||5.006001| SvGETMAGIC|5.004050||p SvGROW||| SvIOK_UV||5.006000| SvIOK_notUV||5.006000| SvIOK_off||| SvIOK_only_UV||5.006000| SvIOK_only||| SvIOK_on||| SvIOKp||| SvIOK||| SvIVX||| SvIV_nomg|5.009001||p SvIV_set||| SvIVx||| SvIV||| SvIsCOW_shared_hash||5.008003| SvIsCOW||5.008003| SvLEN_set||| SvLEN||| SvLOCK||5.007003| SvMAGIC_set|5.009003||p SvNIOK_off||| SvNIOKp||| SvNIOK||| SvNOK_off||| SvNOK_only||| SvNOK_on||| SvNOKp||| SvNOK||| SvNVX||| SvNV_set||| SvNVx||| SvNV||| SvOK||| SvOOK_offset||5.011000| SvOOK||| SvPOK_off||| SvPOK_only_UTF8||5.006000| SvPOK_only||| SvPOK_on||| SvPOKp||| SvPOK||| SvPVX_const|5.009003||p SvPVX_mutable|5.009003||p SvPVX||| SvPV_const|5.009003||p SvPV_flags_const_nolen|5.009003||p SvPV_flags_const|5.009003||p SvPV_flags_mutable|5.009003||p SvPV_flags|5.007002||p SvPV_force_flags_mutable|5.009003||p SvPV_force_flags_nolen|5.009003||p SvPV_force_flags|5.007002||p SvPV_force_mutable|5.009003||p SvPV_force_nolen|5.009003||p SvPV_force_nomg_nolen|5.009003||p SvPV_force_nomg|5.007002||p SvPV_force|||p SvPV_mutable|5.009003||p SvPV_nolen_const|5.009003||p SvPV_nolen|5.006000||p SvPV_nomg_const_nolen|5.009003||p SvPV_nomg_const|5.009003||p SvPV_nomg|5.007002||p SvPV_renew|5.009003||p SvPV_set||| SvPVbyte_force||5.009002| SvPVbyte_nolen||5.006000| SvPVbytex_force||5.006000| SvPVbytex||5.006000| SvPVbyte|5.006000||p SvPVutf8_force||5.006000| SvPVutf8_nolen||5.006000| SvPVutf8x_force||5.006000| SvPVutf8x||5.006000| SvPVutf8||5.006000| SvPVx||| SvPV||| SvREFCNT_dec||| SvREFCNT_inc_NN|5.009004||p SvREFCNT_inc_simple_NN|5.009004||p SvREFCNT_inc_simple_void_NN|5.009004||p SvREFCNT_inc_simple_void|5.009004||p SvREFCNT_inc_simple|5.009004||p SvREFCNT_inc_void_NN|5.009004||p SvREFCNT_inc_void|5.009004||p SvREFCNT_inc|||p SvREFCNT||| SvROK_off||| SvROK_on||| SvROK||| SvRV_set|5.009003||p SvRV||| SvRXOK||5.009005| SvRX||5.009005| SvSETMAGIC||| SvSHARED_HASH|5.009003||p SvSHARE||5.007003| SvSTASH_set|5.009003||p SvSTASH||| SvSetMagicSV_nosteal||5.004000| SvSetMagicSV||5.004000| SvSetSV_nosteal||5.004000| SvSetSV||| SvTAINTED_off||5.004000| SvTAINTED_on||5.004000| SvTAINTED||5.004000| SvTAINT||| SvTRUE||| SvTYPE||| SvUNLOCK||5.007003| SvUOK|5.007001|5.006000|p SvUPGRADE||| SvUTF8_off||5.006000| SvUTF8_on||5.006000| SvUTF8||5.006000| SvUVXx|5.004000||p SvUVX|5.004000||p SvUV_nomg|5.009001||p SvUV_set|5.009003||p SvUVx|5.004000||p SvUV|5.004000||p SvVOK||5.008001| SvVSTRING_mg|5.009004||p THIS|||n UNDERBAR|5.009002||p UTF8_MAXBYTES|5.009002||p UVSIZE|5.006000||p UVTYPE|5.006000||p UVXf|5.007001||p UVof|5.006000||p UVuf|5.006000||p UVxf|5.006000||p WARN_ALL|5.006000||p WARN_AMBIGUOUS|5.006000||p WARN_ASSERTIONS|5.011000||p WARN_BAREWORD|5.006000||p WARN_CLOSED|5.006000||p WARN_CLOSURE|5.006000||p WARN_DEBUGGING|5.006000||p WARN_DEPRECATED|5.006000||p WARN_DIGIT|5.006000||p WARN_EXEC|5.006000||p WARN_EXITING|5.006000||p WARN_GLOB|5.006000||p WARN_INPLACE|5.006000||p WARN_INTERNAL|5.006000||p WARN_IO|5.006000||p WARN_LAYER|5.008000||p WARN_MALLOC|5.006000||p WARN_MISC|5.006000||p WARN_NEWLINE|5.006000||p WARN_NUMERIC|5.006000||p WARN_ONCE|5.006000||p WARN_OVERFLOW|5.006000||p WARN_PACK|5.006000||p WARN_PARENTHESIS|5.006000||p WARN_PIPE|5.006000||p WARN_PORTABLE|5.006000||p WARN_PRECEDENCE|5.006000||p WARN_PRINTF|5.006000||p WARN_PROTOTYPE|5.006000||p WARN_QW|5.006000||p WARN_RECURSION|5.006000||p WARN_REDEFINE|5.006000||p WARN_REGEXP|5.006000||p WARN_RESERVED|5.006000||p WARN_SEMICOLON|5.006000||p WARN_SEVERE|5.006000||p WARN_SIGNAL|5.006000||p WARN_SUBSTR|5.006000||p WARN_SYNTAX|5.006000||p WARN_TAINT|5.006000||p WARN_THREADS|5.008000||p WARN_UNINITIALIZED|5.006000||p WARN_UNOPENED|5.006000||p WARN_UNPACK|5.006000||p WARN_UNTIE|5.006000||p WARN_UTF8|5.006000||p WARN_VOID|5.006000||p XCPT_CATCH|5.009002||p XCPT_RETHROW|5.009002||p XCPT_TRY_END|5.009002||p XCPT_TRY_START|5.009002||p XPUSHi||| XPUSHmortal|5.009002||p XPUSHn||| XPUSHp||| XPUSHs||| XPUSHu|5.004000||p XSPROTO|5.010000||p XSRETURN_EMPTY||| XSRETURN_IV||| XSRETURN_NO||| XSRETURN_NV||| XSRETURN_PV||| XSRETURN_UNDEF||| XSRETURN_UV|5.008001||p XSRETURN_YES||| XSRETURN|||p XST_mIV||| XST_mNO||| XST_mNV||| XST_mPV||| XST_mUNDEF||| XST_mUV|5.008001||p XST_mYES||| XS_VERSION_BOOTCHECK||| XS_VERSION||| XSprePUSH|5.006000||p XS||| ZeroD|5.009002||p Zero||| _aMY_CXT|5.007003||p _pMY_CXT|5.007003||p aMY_CXT_|5.007003||p aMY_CXT|5.007003||p aTHXR_|5.011000||p aTHXR|5.011000||p aTHX_|5.006000||p aTHX|5.006000||p add_data|||n addmad||| allocmy||| amagic_call||| amagic_cmp_locale||| amagic_cmp||| amagic_i_ncmp||| amagic_ncmp||| any_dup||| ao||| append_elem||| append_list||| append_madprops||| apply_attrs_my||| apply_attrs_string||5.006001| apply_attrs||| apply||| atfork_lock||5.007003|n atfork_unlock||5.007003|n av_arylen_p||5.009003| av_clear||| av_create_and_push||5.009005| av_create_and_unshift_one||5.009005| av_delete||5.006000| av_exists||5.006000| av_extend||| av_fetch||| av_fill||| av_iter_p||5.011000| av_len||| av_make||| av_pop||| av_push||| av_reify||| av_shift||| av_store||| av_undef||| av_unshift||| ax|||n bad_type||| bind_match||| block_end||| block_gimme||5.004000| block_start||| boolSV|5.004000||p boot_core_PerlIO||| boot_core_UNIVERSAL||| boot_core_mro||| bytes_from_utf8||5.007001| bytes_to_uni|||n bytes_to_utf8||5.006001| call_argv|5.006000||p call_atexit||5.006000| call_list||5.004000| call_method|5.006000||p call_pv|5.006000||p call_sv|5.006000||p calloc||5.007002|n cando||| cast_i32||5.006000| cast_iv||5.006000| cast_ulong||5.006000| cast_uv||5.006000| check_type_and_open||| check_uni||| checkcomma||| checkposixcc||| ckWARN|5.006000||p ck_anoncode||| ck_bitop||| ck_concat||| ck_defined||| ck_delete||| ck_die||| ck_each||| ck_eof||| ck_eval||| ck_exec||| ck_exists||| ck_exit||| ck_ftst||| ck_fun||| ck_glob||| ck_grep||| ck_index||| ck_join||| ck_lfun||| ck_listiob||| ck_match||| ck_method||| ck_null||| ck_open||| ck_readline||| ck_repeat||| ck_require||| ck_return||| ck_rfun||| ck_rvconst||| ck_sassign||| ck_select||| ck_shift||| ck_sort||| ck_spair||| ck_split||| ck_subr||| ck_substr||| ck_svconst||| ck_trunc||| ck_unpack||| ckwarn_d||5.009003| ckwarn||5.009003| cl_and|||n cl_anything|||n cl_init_zero|||n cl_init|||n cl_is_anything|||n cl_or|||n clear_placeholders||| closest_cop||| convert||| cop_free||| cr_textfilter||| create_eval_scope||| croak_nocontext|||vn croak_xs_usage||5.011000| croak|||v csighandler||5.009003|n curmad||| custom_op_desc||5.007003| custom_op_name||5.007003| cv_ckproto_len||| cv_clone||| cv_const_sv||5.004000| cv_dump||| cv_undef||| cx_dump||5.005000| cx_dup||| cxinc||| dAXMARK|5.009003||p dAX|5.007002||p dITEMS|5.007002||p dMARK||| dMULTICALL||5.009003| dMY_CXT_SV|5.007003||p dMY_CXT|5.007003||p dNOOP|5.006000||p dORIGMARK||| dSP||| dTHR|5.004050||p dTHXR|5.011000||p dTHXa|5.006000||p dTHXoa|5.006000||p dTHX|5.006000||p dUNDERBAR|5.009002||p dVAR|5.009003||p dXCPT|5.009002||p dXSARGS||| dXSI32||| dXSTARG|5.006000||p deb_curcv||| deb_nocontext|||vn deb_stack_all||| deb_stack_n||| debop||5.005000| debprofdump||5.005000| debprof||| debstackptrs||5.007003| debstack||5.007003| debug_start_match||| deb||5.007003|v del_sv||| delete_eval_scope||| delimcpy||5.004000| deprecate_old||| deprecate||| despatch_signals||5.007001| destroy_matcher||| die_nocontext|||vn die_where||| die|||v dirp_dup||| div128||| djSP||| do_aexec5||| do_aexec||| do_aspawn||| do_binmode||5.004050| do_chomp||| do_chop||| do_close||| do_dump_pad||| do_eof||| do_exec3||| do_execfree||| do_exec||| do_gv_dump||5.006000| do_gvgv_dump||5.006000| do_hv_dump||5.006000| do_ipcctl||| do_ipcget||| do_join||| do_kv||| do_magic_dump||5.006000| do_msgrcv||| do_msgsnd||| do_oddball||| do_op_dump||5.006000| do_op_xmldump||| do_open9||5.006000| do_openn||5.007001| do_open||5.004000| do_pmop_dump||5.006000| do_pmop_xmldump||| do_print||| do_readline||| do_seek||| do_semop||| do_shmio||| do_smartmatch||| do_spawn_nowait||| do_spawn||| do_sprintf||| do_sv_dump||5.006000| do_sysseek||| do_tell||| do_trans_complex_utf8||| do_trans_complex||| do_trans_count_utf8||| do_trans_count||| do_trans_simple_utf8||| do_trans_simple||| do_trans||| do_vecget||| do_vecset||| do_vop||| docatch||| doeval||| dofile||| dofindlabel||| doform||| doing_taint||5.008001|n dooneliner||| doopen_pm||| doparseform||| dopoptoeval||| dopoptogiven||| dopoptolabel||| dopoptoloop||| dopoptosub_at||| dopoptowhen||| doref||5.009003| dounwind||| dowantarray||| dump_all||5.006000| dump_eval||5.006000| dump_exec_pos||| dump_fds||| dump_form||5.006000| dump_indent||5.006000|v dump_mstats||| dump_packsubs||5.006000| dump_sub||5.006000| dump_sv_child||| dump_trie_interim_list||| dump_trie_interim_table||| dump_trie||| dump_vindent||5.006000| dumpuntil||| dup_attrlist||| emulate_cop_io||| eval_pv|5.006000||p eval_sv|5.006000||p exec_failed||| expect_number||| fbm_compile||5.005000| fbm_instr||5.005000| feature_is_enabled||| fetch_cop_label||5.011000| filter_add||| filter_del||| filter_gets||| filter_read||| find_and_forget_pmops||| find_array_subscript||| find_beginning||| find_byclass||| find_hash_subscript||| find_in_my_stash||| find_runcv||5.008001| find_rundefsvoffset||5.009002| find_script||| find_uninit_var||| first_symbol|||n fold_constants||| forbid_setid||| force_ident||| force_list||| force_next||| force_version||| force_word||| forget_pmop||| form_nocontext|||vn form||5.004000|v fp_dup||| fprintf_nocontext|||vn free_global_struct||| free_tied_hv_pool||| free_tmps||| gen_constant_list||| get_arena||| get_aux_mg||| get_av|5.006000||p get_context||5.006000|n get_cvn_flags||5.009005| get_cv|5.006000||p get_db_sub||| get_debug_opts||| get_hash_seed||| get_hv|5.006000||p get_isa_hash||| get_mstats||| get_no_modify||| get_num||| get_op_descs||5.005000| get_op_names||5.005000| get_opargs||| get_ppaddr||5.006000| get_re_arg||| get_sv|5.006000||p get_vtbl||5.005030| getcwd_sv||5.007002| getenv_len||| glob_2number||| glob_assign_glob||| glob_assign_ref||| gp_dup||| gp_free||| gp_ref||| grok_bin|5.007003||p grok_hex|5.007003||p grok_number|5.007002||p grok_numeric_radix|5.007002||p grok_oct|5.007003||p group_end||| gv_AVadd||| gv_HVadd||| gv_IOadd||| gv_SVadd||| gv_autoload4||5.004000| gv_check||| gv_const_sv||5.009003| gv_dump||5.006000| gv_efullname3||5.004000| gv_efullname4||5.006001| gv_efullname||| gv_ename||| gv_fetchfile_flags||5.009005| gv_fetchfile||| gv_fetchmeth_autoload||5.007003| gv_fetchmethod_autoload||5.004000| gv_fetchmethod_flags||5.011000| gv_fetchmethod||| gv_fetchmeth||| gv_fetchpvn_flags|5.009002||p gv_fetchpvs|5.009004||p gv_fetchpv||| gv_fetchsv||5.009002| gv_fullname3||5.004000| gv_fullname4||5.006001| gv_fullname||| gv_get_super_pkg||| gv_handler||5.007001| gv_init_sv||| gv_init||| gv_name_set||5.009004| gv_stashpvn|5.004000||p gv_stashpvs|5.009003||p gv_stashpv||| gv_stashsv||| he_dup||| hek_dup||| hfreeentries||| hsplit||| hv_assert||5.011000| hv_auxinit|||n hv_backreferences_p||| hv_clear_placeholders||5.009001| hv_clear||| hv_common_key_len||5.010000| hv_common||5.010000| hv_copy_hints_hv||| hv_delayfree_ent||5.004000| hv_delete_common||| hv_delete_ent||5.004000| hv_delete||| hv_eiter_p||5.009003| hv_eiter_set||5.009003| hv_exists_ent||5.004000| hv_exists||| hv_fetch_ent||5.004000| hv_fetchs|5.009003||p hv_fetch||| hv_free_ent||5.004000| hv_iterinit||| hv_iterkeysv||5.004000| hv_iterkey||| hv_iternext_flags||5.008000| hv_iternextsv||| hv_iternext||| hv_iterval||| hv_kill_backrefs||| hv_ksplit||5.004000| hv_magic_check|||n hv_magic||| hv_name_set||5.009003| hv_notallowed||| hv_placeholders_get||5.009003| hv_placeholders_p||5.009003| hv_placeholders_set||5.009003| hv_riter_p||5.009003| hv_riter_set||5.009003| hv_scalar||5.009001| hv_store_ent||5.004000| hv_store_flags||5.008000| hv_stores|5.009004||p hv_store||| hv_undef||| ibcmp_locale||5.004000| ibcmp_utf8||5.007003| ibcmp||| incline||| incpush_if_exists||| incpush_use_sep||| incpush||| ingroup||| init_argv_symbols||| init_debugger||| init_global_struct||| init_i18nl10n||5.006000| init_i18nl14n||5.006000| init_ids||| init_interp||| init_main_stash||| init_perllib||| init_postdump_symbols||| init_predump_symbols||| init_stacks||5.005000| init_tm||5.007002| instr||| intro_my||| intuit_method||| intuit_more||| invert||| io_close||| isALNUMC|5.006000||p isALNUM||| isALPHA||| isASCII|5.006000||p isBLANK|5.006001||p isCNTRL|5.006000||p isDIGIT||| isGRAPH|5.006000||p isGV_with_GP|5.009004||p isLOWER||| isPRINT|5.004000||p isPSXSPC|5.006001||p isPUNCT|5.006000||p isSPACE||| isUPPER||| isXDIGIT|5.006000||p is_an_int||| is_gv_magical_sv||| is_handle_constructor|||n is_list_assignment||| is_lvalue_sub||5.007001| is_uni_alnum_lc||5.006000| is_uni_alnumc_lc||5.006000| is_uni_alnumc||5.006000| is_uni_alnum||5.006000| is_uni_alpha_lc||5.006000| is_uni_alpha||5.006000| is_uni_ascii_lc||5.006000| is_uni_ascii||5.006000| is_uni_cntrl_lc||5.006000| is_uni_cntrl||5.006000| is_uni_digit_lc||5.006000| is_uni_digit||5.006000| is_uni_graph_lc||5.006000| is_uni_graph||5.006000| is_uni_idfirst_lc||5.006000| is_uni_idfirst||5.006000| is_uni_lower_lc||5.006000| is_uni_lower||5.006000| is_uni_print_lc||5.006000| is_uni_print||5.006000| is_uni_punct_lc||5.006000| is_uni_punct||5.006000| is_uni_space_lc||5.006000| is_uni_space||5.006000| is_uni_upper_lc||5.006000| is_uni_upper||5.006000| is_uni_xdigit_lc||5.006000| is_uni_xdigit||5.006000| is_utf8_alnumc||5.006000| is_utf8_alnum||5.006000| is_utf8_alpha||5.006000| is_utf8_ascii||5.006000| is_utf8_char_slow|||n is_utf8_char||5.006000| is_utf8_cntrl||5.006000| is_utf8_common||| is_utf8_digit||5.006000| is_utf8_graph||5.006000| is_utf8_idcont||5.008000| is_utf8_idfirst||5.006000| is_utf8_lower||5.006000| is_utf8_mark||5.006000| is_utf8_print||5.006000| is_utf8_punct||5.006000| is_utf8_space||5.006000| is_utf8_string_loclen||5.009003| is_utf8_string_loc||5.008001| is_utf8_string||5.006001| is_utf8_upper||5.006000| is_utf8_xdigit||5.006000| isa_lookup||| items|||n ix|||n jmaybe||| join_exact||| keyword||| leave_scope||| lex_end||| lex_start||| linklist||| listkids||| list||| load_module_nocontext|||vn load_module|5.006000||pv localize||| looks_like_bool||| looks_like_number||| lop||| mPUSHi|5.009002||p mPUSHn|5.009002||p mPUSHp|5.009002||p mPUSHs|5.011000||p mPUSHu|5.009002||p mXPUSHi|5.009002||p mXPUSHn|5.009002||p mXPUSHp|5.009002||p mXPUSHs|5.011000||p mXPUSHu|5.009002||p mad_free||| madlex||| madparse||| magic_clear_all_env||| magic_clearenv||| magic_clearhint||| magic_clearisa||| magic_clearpack||| magic_clearsig||| magic_dump||5.006000| magic_existspack||| magic_freearylen_p||| magic_freeovrld||| magic_getarylen||| magic_getdefelem||| magic_getnkeys||| magic_getpack||| magic_getpos||| magic_getsig||| magic_getsubstr||| magic_gettaint||| magic_getuvar||| magic_getvec||| magic_get||| magic_killbackrefs||| magic_len||| magic_methcall||| magic_methpack||| magic_nextpack||| magic_regdata_cnt||| magic_regdatum_get||| magic_regdatum_set||| magic_scalarpack||| magic_set_all_env||| magic_setamagic||| magic_setarylen||| magic_setcollxfrm||| magic_setdbline||| magic_setdefelem||| magic_setenv||| magic_sethint||| magic_setisa||| magic_setmglob||| magic_setnkeys||| magic_setpack||| magic_setpos||| magic_setregexp||| magic_setsig||| magic_setsubstr||| magic_settaint||| magic_setutf8||| magic_setuvar||| magic_setvec||| magic_set||| magic_sizepack||| magic_wipepack||| make_matcher||| make_trie_failtable||| make_trie||| malloc_good_size|||n malloced_size|||n malloc||5.007002|n markstack_grow||| matcher_matches_sv||| measure_struct||| memEQ|5.004000||p memNE|5.004000||p mem_collxfrm||| mem_log_common|||n mess_alloc||| mess_nocontext|||vn mess||5.006000|v method_common||| mfree||5.007002|n mg_clear||| mg_copy||| mg_dup||| mg_find||| mg_free||| mg_get||| mg_length||5.005000| mg_localize||| mg_magical||| mg_set||| mg_size||5.005000| mini_mktime||5.007002| missingterm||| mode_from_discipline||| modkids||| mod||| more_bodies||| more_sv||| moreswitches||| mro_get_from_name||5.011000| mro_get_linear_isa_dfs||| mro_get_linear_isa||5.009005| mro_get_private_data||5.011000| mro_isa_changed_in||| mro_meta_dup||| mro_meta_init||| mro_method_changed_in||5.009005| mro_register||5.011000| mro_set_mro||5.011000| mro_set_private_data||5.011000| mul128||| mulexp10|||n my_atof2||5.007002| my_atof||5.006000| my_attrs||| my_bcopy|||n my_betoh16|||n my_betoh32|||n my_betoh64|||n my_betohi|||n my_betohl|||n my_betohs|||n my_bzero|||n my_chsize||| my_clearenv||| my_cxt_index||| my_cxt_init||| my_dirfd||5.009005| my_exit_jump||| my_exit||| my_failure_exit||5.004000| my_fflush_all||5.006000| my_fork||5.007003|n my_htobe16|||n my_htobe32|||n my_htobe64|||n my_htobei|||n my_htobel|||n my_htobes|||n my_htole16|||n my_htole32|||n my_htole64|||n my_htolei|||n my_htolel|||n my_htoles|||n my_htonl||| my_kid||| my_letoh16|||n my_letoh32|||n my_letoh64|||n my_letohi|||n my_letohl|||n my_letohs|||n my_lstat||| my_memcmp||5.004000|n my_memset|||n my_ntohl||| my_pclose||5.004000| my_popen_list||5.007001| my_popen||5.004000| my_setenv||| my_snprintf|5.009004||pvn my_socketpair||5.007003|n my_sprintf|5.009003||pvn my_stat||| my_strftime||5.007002| my_strlcat|5.009004||pn my_strlcpy|5.009004||pn my_swabn|||n my_swap||| my_unexec||| my_vsnprintf||5.009004|n need_utf8|||n newANONATTRSUB||5.006000| newANONHASH||| newANONLIST||| newANONSUB||| newASSIGNOP||| newATTRSUB||5.006000| newAVREF||| newAV||| newBINOP||| newCONDOP||| newCONSTSUB|5.004050||p newCVREF||| newDEFSVOP||| newFORM||| newFOROP||| newGIVENOP||5.009003| newGIVWHENOP||| newGP||| newGVOP||| newGVREF||| newGVgen||| newHVREF||| newHVhv||5.005000| newHV||| newIO||| newLISTOP||| newLOGOP||| newLOOPEX||| newLOOPOP||| newMADPROP||| newMADsv||| newMYSUB||| newNULLLIST||| newOP||| newPADOP||| newPMOP||| newPROG||| newPVOP||| newRANGE||| newRV_inc|5.004000||p newRV_noinc|5.004000||p newRV||| newSLICEOP||| newSTATEOP||| newSUB||| newSVOP||| newSVREF||| newSV_type|5.009005||p newSVhek||5.009003| newSViv||| newSVnv||| newSVpvf_nocontext|||vn newSVpvf||5.004000|v newSVpvn_flags|5.011000||p newSVpvn_share|5.007001||p newSVpvn_utf8|5.011000||p newSVpvn|5.004050||p newSVpvs_flags|5.011000||p newSVpvs_share||5.009003| newSVpvs|5.009003||p newSVpv||| newSVrv||| newSVsv||| newSVuv|5.006000||p newSV||| newTOKEN||| newUNOP||| newWHENOP||5.009003| newWHILEOP||5.009003| newXS_flags||5.009004| newXSproto||5.006000| newXS||5.006000| new_collate||5.006000| new_constant||| new_ctype||5.006000| new_he||| new_logop||| new_numeric||5.006000| new_stackinfo||5.005000| new_version||5.009000| new_warnings_bitfield||| next_symbol||| nextargv||| nextchar||| ninstr||| no_bareword_allowed||| no_fh_allowed||| no_op||| not_a_number||| nothreadhook||5.008000| nuke_stacks||| num_overflow|||n offer_nice_chunk||| oopsAV||| oopsHV||| op_clear||| op_const_sv||| op_dump||5.006000| op_free||| op_getmad_weak||| op_getmad||| op_null||5.007002| op_refcnt_dec||| op_refcnt_inc||| op_refcnt_lock||5.009002| op_refcnt_unlock||5.009002| op_xmldump||| open_script||| pMY_CXT_|5.007003||p pMY_CXT|5.007003||p pTHX_|5.006000||p pTHX|5.006000||p packWARN|5.007003||p pack_cat||5.007003| pack_rec||| package||| packlist||5.008001| pad_add_anon||| pad_add_name||| pad_alloc||| pad_block_start||| pad_check_dup||| pad_compname_type||| pad_findlex||| pad_findmy||| pad_fixup_inner_anons||| pad_free||| pad_leavemy||| pad_new||| pad_peg|||n pad_push||| pad_reset||| pad_setsv||| pad_sv||5.011000| pad_swipe||| pad_tidy||| pad_undef||| parse_body||| parse_unicode_opts||| parser_dup||| parser_free||| path_is_absolute|||n peep||| pending_Slabs_to_ro||| perl_alloc_using|||n perl_alloc|||n perl_clone_using|||n perl_clone|||n perl_construct|||n perl_destruct||5.007003|n perl_free|||n perl_parse||5.006000|n perl_run|||n pidgone||| pm_description||| pmflag||| pmop_dump||5.006000| pmop_xmldump||| pmruntime||| pmtrans||| pop_scope||| pregcomp||5.009005| pregexec||| pregfree2||5.011000| pregfree||| prepend_elem||| prepend_madprops||| printbuf||| printf_nocontext|||vn process_special_blocks||| ptr_table_clear||5.009005| ptr_table_fetch||5.009005| ptr_table_find|||n ptr_table_free||5.009005| ptr_table_new||5.009005| ptr_table_split||5.009005| ptr_table_store||5.009005| push_scope||| put_byte||| pv_display|5.006000||p pv_escape|5.009004||p pv_pretty|5.009004||p pv_uni_display||5.007003| qerror||| qsortsvu||| re_compile||5.009005| re_croak2||| re_dup_guts||| re_intuit_start||5.009005| re_intuit_string||5.006000| readpipe_override||| realloc||5.007002|n reentrant_free||| reentrant_init||| reentrant_retry|||vn reentrant_size||| ref_array_or_hash||| refcounted_he_chain_2hv||| refcounted_he_fetch||| refcounted_he_free||| refcounted_he_new_common||| refcounted_he_new||| refcounted_he_value||| refkids||| refto||| ref||5.011000| reg_check_named_buff_matched||| reg_named_buff_all||5.009005| reg_named_buff_exists||5.009005| reg_named_buff_fetch||5.009005| reg_named_buff_firstkey||5.009005| reg_named_buff_iter||| reg_named_buff_nextkey||5.009005| reg_named_buff_scalar||5.009005| reg_named_buff||| reg_namedseq||| reg_node||| reg_numbered_buff_fetch||| reg_numbered_buff_length||| reg_numbered_buff_store||| reg_qr_package||| reg_recode||| reg_scan_name||| reg_skipcomment||| reg_temp_copy||| reganode||| regatom||| regbranch||| regclass_swash||5.009004| regclass||| regcppop||| regcppush||| regcurly|||n regdump_extflags||| regdump||5.005000| regdupe_internal||| regexec_flags||5.005000| regfree_internal||5.009005| reghop3|||n reghop4|||n reghopmaybe3|||n reginclass||| reginitcolors||5.006000| reginsert||| regmatch||| regnext||5.005000| regpiece||| regpposixcc||| regprop||| regrepeat||| regtail_study||| regtail||| regtry||| reguni||| regwhite|||n reg||| repeatcpy||| report_evil_fh||| report_uninit||| require_pv||5.006000| require_tie_mod||| restore_magic||| rninstr||| rsignal_restore||| rsignal_save||| rsignal_state||5.004000| rsignal||5.004000| run_body||| run_user_filter||| runops_debug||5.005000| runops_standard||5.005000| rvpv_dup||| rxres_free||| rxres_restore||| rxres_save||| safesyscalloc||5.006000|n safesysfree||5.006000|n safesysmalloc||5.006000|n safesysrealloc||5.006000|n same_dirent||| save_I16||5.004000| save_I32||| save_I8||5.006000| save_adelete||5.011000| save_aelem||5.004050| save_alloc||5.006000| save_aptr||| save_ary||| save_bool||5.008001| save_clearsv||| save_delete||| save_destructor_x||5.006000| save_destructor||5.006000| save_freeop||| save_freepv||| save_freesv||| save_generic_pvref||5.006001| save_generic_svref||5.005030| save_gp||5.004000| save_hash||| save_hek_flags|||n save_helem_flags||5.011000| save_helem||5.004050| save_hints||| save_hptr||| save_int||| save_item||| save_iv||5.005000| save_lines||| save_list||| save_long||| save_magic||| save_mortalizesv||5.007001| save_nogv||| save_op||| save_padsv_and_mortalize||5.011000| save_pptr||| save_pushi32ptr||| save_pushptri32ptr||| save_pushptrptr||| save_pushptr||5.011000| save_re_context||5.006000| save_scalar_at||| save_scalar||| save_set_svflags||5.009000| save_shared_pvref||5.007003| save_sptr||| save_svref||| save_vptr||5.006000| savepvn||| savepvs||5.009003| savepv||| savesharedpvn||5.009005| savesharedpv||5.007003| savestack_grow_cnt||5.008001| savestack_grow||| savesvpv||5.009002| sawparens||| scalar_mod_type|||n scalarboolean||| scalarkids||| scalarseq||| scalarvoid||| scalar||| scan_bin||5.006000| scan_commit||| scan_const||| scan_formline||| scan_heredoc||| scan_hex||| scan_ident||| scan_inputsymbol||| scan_num||5.007001| scan_oct||| scan_pat||| scan_str||| scan_subst||| scan_trans||| scan_version||5.009001| scan_vstring||5.009005| scan_word||| scope||| screaminstr||5.005000| search_const||| seed||5.008001| sequence_num||| sequence_tail||| sequence||| set_context||5.006000|n set_numeric_local||5.006000| set_numeric_radix||5.006000| set_numeric_standard||5.006000| setdefout||| share_hek_flags||| share_hek||5.004000| si_dup||| sighandler|||n simplify_sort||| skipspace0||| skipspace1||| skipspace2||| skipspace||| softref2xv||| sortcv_stacked||| sortcv_xsub||| sortcv||| sortsv_flags||5.009003| sortsv||5.007003| space_join_names_mortal||| ss_dup||| stack_grow||| start_force||| start_glob||| start_subparse||5.004000| stashpv_hvname_match||5.011000| stdize_locale||| store_cop_label||| strEQ||| strGE||| strGT||| strLE||| strLT||| strNE||| str_to_version||5.006000| strip_return||| strnEQ||| strnNE||| study_chunk||| sub_crush_depth||| sublex_done||| sublex_push||| sublex_start||| sv_2bool||| sv_2cv||| sv_2io||| sv_2iuv_common||| sv_2iuv_non_preserve||| sv_2iv_flags||5.009001| sv_2iv||| sv_2mortal||| sv_2num||| sv_2nv||| sv_2pv_flags|5.007002||p sv_2pv_nolen|5.006000||p sv_2pvbyte_nolen|5.006000||p sv_2pvbyte|5.006000||p sv_2pvutf8_nolen||5.006000| sv_2pvutf8||5.006000| sv_2pv||| sv_2uv_flags||5.009001| sv_2uv|5.004000||p sv_add_arena||| sv_add_backref||| sv_backoff||| sv_bless||| sv_cat_decode||5.008001| sv_catpv_mg|5.004050||p sv_catpvf_mg_nocontext|||pvn sv_catpvf_mg|5.006000|5.004000|pv sv_catpvf_nocontext|||vn sv_catpvf||5.004000|v sv_catpvn_flags||5.007002| sv_catpvn_mg|5.004050||p sv_catpvn_nomg|5.007002||p sv_catpvn||| sv_catpvs|5.009003||p sv_catpv||| sv_catsv_flags||5.007002| sv_catsv_mg|5.004050||p sv_catsv_nomg|5.007002||p sv_catsv||| sv_catxmlpvn||| sv_catxmlsv||| sv_chop||| sv_clean_all||| sv_clean_objs||| sv_clear||| sv_cmp_locale||5.004000| sv_cmp||| sv_collxfrm||| sv_compile_2op||5.008001| sv_copypv||5.007003| sv_dec||| sv_del_backref||| sv_derived_from||5.004000| sv_destroyable||5.010000| sv_does||5.009004| sv_dump||| sv_dup_inc_multiple||| sv_dup||| sv_eq||| sv_exp_grow||| sv_force_normal_flags||5.007001| sv_force_normal||5.006000| sv_free2||| sv_free_arenas||| sv_free||| sv_gets||5.004000| sv_grow||| sv_i_ncmp||| sv_inc||| sv_insert_flags||5.011000| sv_insert||| sv_isa||| sv_isobject||| sv_iv||5.005000| sv_kill_backrefs||| sv_len_utf8||5.006000| sv_len||| sv_magic_portable|5.011000|5.004000|p sv_magicext||5.007003| sv_magic||| sv_mortalcopy||| sv_ncmp||| sv_newmortal||| sv_newref||| sv_nolocking||5.007003| sv_nosharing||5.007003| sv_nounlocking||| sv_nv||5.005000| sv_peek||5.005000| sv_pos_b2u_midway||| sv_pos_b2u||5.006000| sv_pos_u2b_cached||| sv_pos_u2b_forwards|||n sv_pos_u2b_midway|||n sv_pos_u2b||5.006000| sv_pvbyten_force||5.006000| sv_pvbyten||5.006000| sv_pvbyte||5.006000| sv_pvn_force_flags|5.007002||p sv_pvn_force||| sv_pvn_nomg|5.007003|5.005000|p sv_pvn||5.005000| sv_pvutf8n_force||5.006000| sv_pvutf8n||5.006000| sv_pvutf8||5.006000| sv_pv||5.006000| sv_recode_to_utf8||5.007003| sv_reftype||| sv_release_COW||| sv_replace||| sv_report_used||| sv_reset||| sv_rvweaken||5.006000| sv_setiv_mg|5.004050||p sv_setiv||| sv_setnv_mg|5.006000||p sv_setnv||| sv_setpv_mg|5.004050||p sv_setpvf_mg_nocontext|||pvn sv_setpvf_mg|5.006000|5.004000|pv sv_setpvf_nocontext|||vn sv_setpvf||5.004000|v sv_setpviv_mg||5.008001| sv_setpviv||5.008001| sv_setpvn_mg|5.004050||p sv_setpvn||| sv_setpvs|5.009004||p sv_setpv||| sv_setref_iv||| sv_setref_nv||| sv_setref_pvn||| sv_setref_pv||| sv_setref_uv||5.007001| sv_setsv_cow||| sv_setsv_flags||5.007002| sv_setsv_mg|5.004050||p sv_setsv_nomg|5.007002||p sv_setsv||| sv_setuv_mg|5.004050||p sv_setuv|5.004000||p sv_tainted||5.004000| sv_taint||5.004000| sv_true||5.005000| sv_unglob||| sv_uni_display||5.007003| sv_unmagic||| sv_unref_flags||5.007001| sv_unref||| sv_untaint||5.004000| sv_upgrade||| sv_usepvn_flags||5.009004| sv_usepvn_mg|5.004050||p sv_usepvn||| sv_utf8_decode||5.006000| sv_utf8_downgrade||5.006000| sv_utf8_encode||5.006000| sv_utf8_upgrade_flags_grow||5.011000| sv_utf8_upgrade_flags||5.007002| sv_utf8_upgrade_nomg||5.007002| sv_utf8_upgrade||5.007001| sv_uv|5.005000||p sv_vcatpvf_mg|5.006000|5.004000|p sv_vcatpvfn||5.004000| sv_vcatpvf|5.006000|5.004000|p sv_vsetpvf_mg|5.006000|5.004000|p sv_vsetpvfn||5.004000| sv_vsetpvf|5.006000|5.004000|p sv_xmlpeek||| svtype||| swallow_bom||| swap_match_buff||| swash_fetch||5.007002| swash_get||| swash_init||5.006000| sys_init3||5.010000|n sys_init||5.010000|n sys_intern_clear||| sys_intern_dup||| sys_intern_init||| sys_term||5.010000|n taint_env||| taint_proper||| tmps_grow||5.006000| toLOWER||| toUPPER||| to_byte_substr||| to_uni_fold||5.007003| to_uni_lower_lc||5.006000| to_uni_lower||5.007003| to_uni_title_lc||5.006000| to_uni_title||5.007003| to_uni_upper_lc||5.006000| to_uni_upper||5.007003| to_utf8_case||5.007003| to_utf8_fold||5.007003| to_utf8_lower||5.007003| to_utf8_substr||| to_utf8_title||5.007003| to_utf8_upper||5.007003| token_free||| token_getmad||| tokenize_use||| tokeq||| tokereport||| too_few_arguments||| too_many_arguments||| uiv_2buf|||n unlnk||| unpack_rec||| unpack_str||5.007003| unpackstring||5.008001| unshare_hek_or_pvn||| unshare_hek||| unsharepvn||5.004000| unwind_handler_stack||| update_debugger_info||| upg_version||5.009005| usage||| utf16_to_utf8_reversed||5.006001| utf16_to_utf8||5.006001| utf8_distance||5.006000| utf8_hop||5.006000| utf8_length||5.007001| utf8_mg_pos_cache_update||| utf8_to_bytes||5.006001| utf8_to_uvchr||5.007001| utf8_to_uvuni||5.007001| utf8n_to_uvchr||| utf8n_to_uvuni||5.007001| utilize||| uvchr_to_utf8_flags||5.007003| uvchr_to_utf8||| uvuni_to_utf8_flags||5.007003| uvuni_to_utf8||5.007001| validate_suid||| varname||| vcmp||5.009000| vcroak||5.006000| vdeb||5.007003| vdie_common||| vdie_croak_common||| vdie||| vform||5.006000| visit||| vivify_defelem||| vivify_ref||| vload_module|5.006000||p vmess||5.006000| vnewSVpvf|5.006000|5.004000|p vnormal||5.009002| vnumify||5.009000| vstringify||5.009000| vverify||5.009003| vwarner||5.006000| vwarn||5.006000| wait4pid||| warn_nocontext|||vn warner_nocontext|||vn warner|5.006000|5.004000|pv warn|||v watch||| whichsig||| write_no_mem||| write_to_stderr||| xmldump_all||| xmldump_attr||| xmldump_eval||| xmldump_form||| xmldump_indent|||v xmldump_packsubs||| xmldump_sub||| xmldump_vindent||| yyerror||| yylex||| yyparse||| yywarn||| ); if (exists $opt{'list-unsupported'}) { my $f; for $f (sort { lc $a cmp lc $b } keys %API) { next unless $API{$f}{todo}; print "$f ", '.'x(40-length($f)), " ", format_version($API{$f}{todo}), "\n"; } exit 0; } # Scan for possible replacement candidates my(%replace, %need, %hints, %warnings, %depends); my $replace = 0; my($hint, $define, $function); sub find_api { my $code = shift; $code =~ s{ / (?: \*[^*]*\*+(?:[^$ccs][^*]*\*+)* / | /[^\r\n]*) | "[^"\\]*(?:\\.[^"\\]*)*" | '[^'\\]*(?:\\.[^'\\]*)*' }{}egsx; grep { exists $API{$_} } $code =~ /(\w+)/mg; } while () { if ($hint) { my $h = $hint->[0] eq 'Hint' ? \%hints : \%warnings; if (m{^\s*\*\s(.*?)\s*$}) { for (@{$hint->[1]}) { $h->{$_} ||= ''; # suppress warning with older perls $h->{$_} .= "$1\n"; } } else { undef $hint } } $hint = [$1, [split /,?\s+/, $2]] if m{^\s*$rccs\s+(Hint|Warning):\s+(\w+(?:,?\s+\w+)*)\s*$}; if ($define) { if ($define->[1] =~ /\\$/) { $define->[1] .= $_; } else { if (exists $API{$define->[0]} && $define->[1] !~ /^DPPP_\(/) { my @n = find_api($define->[1]); push @{$depends{$define->[0]}}, @n if @n } undef $define; } } $define = [$1, $2] if m{^\s*#\s*define\s+(\w+)(?:\([^)]*\))?\s+(.*)}; if ($function) { if (/^}/) { if (exists $API{$function->[0]}) { my @n = find_api($function->[1]); push @{$depends{$function->[0]}}, @n if @n } undef $function; } else { $function->[1] .= $_; } } $function = [$1, ''] if m{^DPPP_\(my_(\w+)\)}; $replace = $1 if m{^\s*$rccs\s+Replace:\s+(\d+)\s+$rcce\s*$}; $replace{$2} = $1 if $replace and m{^\s*#\s*define\s+(\w+)(?:\([^)]*\))?\s+(\w+)}; $replace{$2} = $1 if m{^\s*#\s*define\s+(\w+)(?:\([^)]*\))?\s+(\w+).*$rccs\s+Replace\s+$rcce}; $replace{$1} = $2 if m{^\s*$rccs\s+Replace (\w+) with (\w+)\s+$rcce\s*$}; if (m{^\s*$rccs\s+(\w+(\s*,\s*\w+)*)\s+depends\s+on\s+(\w+(\s*,\s*\w+)*)\s+$rcce\s*$}) { my @deps = map { s/\s+//g; $_ } split /,/, $3; my $d; for $d (map { s/\s+//g; $_ } split /,/, $1) { push @{$depends{$d}}, @deps; } } $need{$1} = 1 if m{^#if\s+defined\(NEED_(\w+)(?:_GLOBAL)?\)}; } for (values %depends) { my %s; $_ = [sort grep !$s{$_}++, @$_]; } if (exists $opt{'api-info'}) { my $f; my $count = 0; my $match = $opt{'api-info'} =~ m!^/(.*)/$! ? $1 : "^\Q$opt{'api-info'}\E\$"; for $f (sort { lc $a cmp lc $b } keys %API) { next unless $f =~ /$match/; print "\n=== $f ===\n\n"; my $info = 0; if ($API{$f}{base} || $API{$f}{todo}) { my $base = format_version($API{$f}{base} || $API{$f}{todo}); print "Supported at least starting from perl-$base.\n"; $info++; } if ($API{$f}{provided}) { my $todo = $API{$f}{todo} ? format_version($API{$f}{todo}) : "5.003"; print "Support by $ppport provided back to perl-$todo.\n"; print "Support needs to be explicitly requested by NEED_$f.\n" if exists $need{$f}; print "Depends on: ", join(', ', @{$depends{$f}}), ".\n" if exists $depends{$f}; print "\n$hints{$f}" if exists $hints{$f}; print "\nWARNING:\n$warnings{$f}" if exists $warnings{$f}; $info++; } print "No portability information available.\n" unless $info; $count++; } $count or print "Found no API matching '$opt{'api-info'}'."; print "\n"; exit 0; } if (exists $opt{'list-provided'}) { my $f; for $f (sort { lc $a cmp lc $b } keys %API) { next unless $API{$f}{provided}; my @flags; push @flags, 'explicit' if exists $need{$f}; push @flags, 'depend' if exists $depends{$f}; push @flags, 'hint' if exists $hints{$f}; push @flags, 'warning' if exists $warnings{$f}; my $flags = @flags ? ' ['.join(', ', @flags).']' : ''; print "$f$flags\n"; } exit 0; } my @files; my @srcext = qw( .xs .c .h .cc .cpp -c.inc -xs.inc ); my $srcext = join '|', map { quotemeta $_ } @srcext; if (@ARGV) { my %seen; for (@ARGV) { if (-e) { if (-f) { push @files, $_ unless $seen{$_}++; } else { warn "'$_' is not a file.\n" } } else { my @new = grep { -f } glob $_ or warn "'$_' does not exist.\n"; push @files, grep { !$seen{$_}++ } @new; } } } else { eval { require File::Find; File::Find::find(sub { $File::Find::name =~ /($srcext)$/i and push @files, $File::Find::name; }, '.'); }; if ($@) { @files = map { glob "*$_" } @srcext; } } if (!@ARGV || $opt{filter}) { my(@in, @out); my %xsc = map { /(.*)\.xs$/ ? ("$1.c" => 1, "$1.cc" => 1) : () } @files; for (@files) { my $out = exists $xsc{$_} || /\b\Q$ppport\E$/i || !/($srcext)$/i; push @{ $out ? \@out : \@in }, $_; } if (@ARGV && @out) { warning("Skipping the following files (use --nofilter to avoid this):\n| ", join "\n| ", @out); } @files = @in; } die "No input files given!\n" unless @files; my(%files, %global, %revreplace); %revreplace = reverse %replace; my $filename; my $patch_opened = 0; for $filename (@files) { unless (open IN, "<$filename") { warn "Unable to read from $filename: $!\n"; next; } info("Scanning $filename ..."); my $c = do { local $/; }; close IN; my %file = (orig => $c, changes => 0); # Temporarily remove C/XS comments and strings from the code my @ccom; $c =~ s{ ( ^$HS*\#$HS*include\b[^\r\n]+\b(?:\Q$ppport\E|XSUB\.h)\b[^\r\n]* | ^$HS*\#$HS*(?:define|elif|if(?:def)?)\b[^\r\n]* ) | ( ^$HS*\#[^\r\n]* | "[^"\\]*(?:\\.[^"\\]*)*" | '[^'\\]*(?:\\.[^'\\]*)*' | / (?: \*[^*]*\*+(?:[^$ccs][^*]*\*+)* / | /[^\r\n]* ) ) }{ defined $2 and push @ccom, $2; defined $1 ? $1 : "$ccs$#ccom$cce" }mgsex; $file{ccom} = \@ccom; $file{code} = $c; $file{has_inc_ppport} = $c =~ /^$HS*#$HS*include[^\r\n]+\b\Q$ppport\E\b/m; my $func; for $func (keys %API) { my $match = $func; $match .= "|$revreplace{$func}" if exists $revreplace{$func}; if ($c =~ /\b(?:Perl_)?($match)\b/) { $file{uses_replace}{$1}++ if exists $revreplace{$func} && $1 eq $revreplace{$func}; $file{uses_Perl}{$func}++ if $c =~ /\bPerl_$func\b/; if (exists $API{$func}{provided}) { $file{uses_provided}{$func}++; if (!exists $API{$func}{base} || $API{$func}{base} > $opt{'compat-version'}) { $file{uses}{$func}++; my @deps = rec_depend($func); if (@deps) { $file{uses_deps}{$func} = \@deps; for (@deps) { $file{uses}{$_} = 0 unless exists $file{uses}{$_}; } } for ($func, @deps) { $file{needs}{$_} = 'static' if exists $need{$_}; } } } if (exists $API{$func}{todo} && $API{$func}{todo} > $opt{'compat-version'}) { if ($c =~ /\b$func\b/) { $file{uses_todo}{$func}++; } } } } while ($c =~ /^$HS*#$HS*define$HS+(NEED_(\w+?)(_GLOBAL)?)\b/mg) { if (exists $need{$2}) { $file{defined $3 ? 'needed_global' : 'needed_static'}{$2}++; } else { warning("Possibly wrong #define $1 in $filename") } } for (qw(uses needs uses_todo needed_global needed_static)) { for $func (keys %{$file{$_}}) { push @{$global{$_}{$func}}, $filename; } } $files{$filename} = \%file; } # Globally resolve NEED_'s my $need; for $need (keys %{$global{needs}}) { if (@{$global{needs}{$need}} > 1) { my @targets = @{$global{needs}{$need}}; my @t = grep $files{$_}{needed_global}{$need}, @targets; @targets = @t if @t; @t = grep /\.xs$/i, @targets; @targets = @t if @t; my $target = shift @targets; $files{$target}{needs}{$need} = 'global'; for (@{$global{needs}{$need}}) { $files{$_}{needs}{$need} = 'extern' if $_ ne $target; } } } for $filename (@files) { exists $files{$filename} or next; info("=== Analyzing $filename ==="); my %file = %{$files{$filename}}; my $func; my $c = $file{code}; my $warnings = 0; for $func (sort keys %{$file{uses_Perl}}) { if ($API{$func}{varargs}) { unless ($API{$func}{nothxarg}) { my $changes = ($c =~ s{\b(Perl_$func\s*\(\s*)(?!aTHX_?)(\)|[^\s)]*\))} { $1 . ($2 eq ')' ? 'aTHX' : 'aTHX_ ') . $2 }ge); if ($changes) { warning("Doesn't pass interpreter argument aTHX to Perl_$func"); $file{changes} += $changes; } } } else { warning("Uses Perl_$func instead of $func"); $file{changes} += ($c =~ s{\bPerl_$func(\s*)\((\s*aTHX_?)?\s*} {$func$1(}g); } } for $func (sort keys %{$file{uses_replace}}) { warning("Uses $func instead of $replace{$func}"); $file{changes} += ($c =~ s/\b$func\b/$replace{$func}/g); } for $func (sort keys %{$file{uses_provided}}) { if ($file{uses}{$func}) { if (exists $file{uses_deps}{$func}) { diag("Uses $func, which depends on ", join(', ', @{$file{uses_deps}{$func}})); } else { diag("Uses $func"); } } $warnings += hint($func); } unless ($opt{quiet}) { for $func (sort keys %{$file{uses_todo}}) { print "*** WARNING: Uses $func, which may not be portable below perl ", format_version($API{$func}{todo}), ", even with '$ppport'\n"; $warnings++; } } for $func (sort keys %{$file{needed_static}}) { my $message = ''; if (not exists $file{uses}{$func}) { $message = "No need to define NEED_$func if $func is never used"; } elsif (exists $file{needs}{$func} && $file{needs}{$func} ne 'static') { $message = "No need to define NEED_$func when already needed globally"; } if ($message) { diag($message); $file{changes} += ($c =~ s/^$HS*#$HS*define$HS+NEED_$func\b.*$LF//mg); } } for $func (sort keys %{$file{needed_global}}) { my $message = ''; if (not exists $global{uses}{$func}) { $message = "No need to define NEED_${func}_GLOBAL if $func is never used"; } elsif (exists $file{needs}{$func}) { if ($file{needs}{$func} eq 'extern') { $message = "No need to define NEED_${func}_GLOBAL when already needed globally"; } elsif ($file{needs}{$func} eq 'static') { $message = "No need to define NEED_${func}_GLOBAL when only used in this file"; } } if ($message) { diag($message); $file{changes} += ($c =~ s/^$HS*#$HS*define$HS+NEED_${func}_GLOBAL\b.*$LF//mg); } } $file{needs_inc_ppport} = keys %{$file{uses}}; if ($file{needs_inc_ppport}) { my $pp = ''; for $func (sort keys %{$file{needs}}) { my $type = $file{needs}{$func}; next if $type eq 'extern'; my $suffix = $type eq 'global' ? '_GLOBAL' : ''; unless (exists $file{"needed_$type"}{$func}) { if ($type eq 'global') { diag("Files [@{$global{needs}{$func}}] need $func, adding global request"); } else { diag("File needs $func, adding static request"); } $pp .= "#define NEED_$func$suffix\n"; } } if ($pp && ($c =~ s/^(?=$HS*#$HS*define$HS+NEED_\w+)/$pp/m)) { $pp = ''; $file{changes}++; } unless ($file{has_inc_ppport}) { diag("Needs to include '$ppport'"); $pp .= qq(#include "$ppport"\n) } if ($pp) { $file{changes} += ($c =~ s/^($HS*#$HS*define$HS+NEED_\w+.*?)^/$1$pp/ms) || ($c =~ s/^(?=$HS*#$HS*include.*\Q$ppport\E)/$pp/m) || ($c =~ s/^($HS*#$HS*include.*XSUB.*\s*?)^/$1$pp/m) || ($c =~ s/^/$pp/); } } else { if ($file{has_inc_ppport}) { diag("No need to include '$ppport'"); $file{changes} += ($c =~ s/^$HS*?#$HS*include.*\Q$ppport\E.*?$LF//m); } } # put back in our C comments my $ix; my $cppc = 0; my @ccom = @{$file{ccom}}; for $ix (0 .. $#ccom) { if (!$opt{cplusplus} && $ccom[$ix] =~ s!^//!!) { $cppc++; $file{changes} += $c =~ s/$rccs$ix$rcce/$ccs$ccom[$ix] $cce/; } else { $c =~ s/$rccs$ix$rcce/$ccom[$ix]/; } } if ($cppc) { my $s = $cppc != 1 ? 's' : ''; warning("Uses $cppc C++ style comment$s, which is not portable"); } my $s = $warnings != 1 ? 's' : ''; my $warn = $warnings ? " ($warnings warning$s)" : ''; info("Analysis completed$warn"); if ($file{changes}) { if (exists $opt{copy}) { my $newfile = "$filename$opt{copy}"; if (-e $newfile) { error("'$newfile' already exists, refusing to write copy of '$filename'"); } else { local *F; if (open F, ">$newfile") { info("Writing copy of '$filename' with changes to '$newfile'"); print F $c; close F; } else { error("Cannot open '$newfile' for writing: $!"); } } } elsif (exists $opt{patch} || $opt{changes}) { if (exists $opt{patch}) { unless ($patch_opened) { if (open PATCH, ">$opt{patch}") { $patch_opened = 1; } else { error("Cannot open '$opt{patch}' for writing: $!"); delete $opt{patch}; $opt{changes} = 1; goto fallback; } } mydiff(\*PATCH, $filename, $c); } else { fallback: info("Suggested changes:"); mydiff(\*STDOUT, $filename, $c); } } else { my $s = $file{changes} == 1 ? '' : 's'; info("$file{changes} potentially required change$s detected"); } } else { info("Looks good"); } } close PATCH if $patch_opened; exit 0; sub try_use { eval "use @_;"; return $@ eq '' } sub mydiff { local *F = shift; my($file, $str) = @_; my $diff; if (exists $opt{diff}) { $diff = run_diff($opt{diff}, $file, $str); } if (!defined $diff and try_use('Text::Diff')) { $diff = Text::Diff::diff($file, \$str, { STYLE => 'Unified' }); $diff = <
$tmp") { print F $str; close F; if (open F, "$prog $file $tmp |") { while () { s/\Q$tmp\E/$file.patched/; $diff .= $_; } close F; unlink $tmp; return $diff; } unlink $tmp; } else { error("Cannot open '$tmp' for writing: $!"); } return undef; } sub rec_depend { my($func, $seen) = @_; return () unless exists $depends{$func}; $seen = {%{$seen||{}}}; return () if $seen->{$func}++; my %s; grep !$s{$_}++, map { ($_, rec_depend($_, $seen)) } @{$depends{$func}}; } sub parse_version { my $ver = shift; if ($ver =~ /^(\d+)\.(\d+)\.(\d+)$/) { return ($1, $2, $3); } elsif ($ver !~ /^\d+\.[\d_]+$/) { die "cannot parse version '$ver'\n"; } $ver =~ s/_//g; $ver =~ s/$/000000/; my($r,$v,$s) = $ver =~ /(\d+)\.(\d{3})(\d{3})/; $v = int $v; $s = int $s; if ($r < 5 || ($r == 5 && $v < 6)) { if ($s % 10) { die "cannot parse version '$ver'\n"; } } return ($r, $v, $s); } sub format_version { my $ver = shift; $ver =~ s/$/000000/; my($r,$v,$s) = $ver =~ /(\d+)\.(\d{3})(\d{3})/; $v = int $v; $s = int $s; if ($r < 5 || ($r == 5 && $v < 6)) { if ($s % 10) { die "invalid version '$ver'\n"; } $s /= 10; $ver = sprintf "%d.%03d", $r, $v; $s > 0 and $ver .= sprintf "_%02d", $s; return $ver; } return sprintf "%d.%d.%d", $r, $v, $s; } sub info { $opt{quiet} and return; print @_, "\n"; } sub diag { $opt{quiet} and return; $opt{diag} and print @_, "\n"; } sub warning { $opt{quiet} and return; print "*** ", @_, "\n"; } sub error { print "*** ERROR: ", @_, "\n"; } my %given_hints; my %given_warnings; sub hint { $opt{quiet} and return; my $func = shift; my $rv = 0; if (exists $warnings{$func} && !$given_warnings{$func}++) { my $warn = $warnings{$func}; $warn =~ s!^!*** !mg; print "*** WARNING: $func\n", $warn; $rv++; } if ($opt{hints} && exists $hints{$func} && !$given_hints{$func}++) { my $hint = $hints{$func}; $hint =~ s/^/ /mg; print " --- hint for $func ---\n", $hint; } $rv; } sub usage { my($usage) = do { local(@ARGV,$/)=($0); <> } =~ /^=head\d$HS+SYNOPSIS\s*^(.*?)\s*^=/ms; my %M = ( 'I' => '*' ); $usage =~ s/^\s*perl\s+\S+/$^X $0/; $usage =~ s/([A-Z])<([^>]+)>/$M{$1}$2$M{$1}/g; print < }; my($copy) = $self =~ /^=head\d\s+COPYRIGHT\s*^(.*?)^=\w+/ms; $copy =~ s/^(?=\S+)/ /gms; $self =~ s/^$HS+Do NOT edit.*?(?=^-)/$copy/ms; $self =~ s/^SKIP.*(?=^__DATA__)/SKIP if (\@ARGV && \$ARGV[0] eq '--unstrip') { eval { require Devel::PPPort }; \$@ and die "Cannot require Devel::PPPort, please install.\\n"; if (eval \$Devel::PPPort::VERSION < $VERSION) { die "$0 was originally generated with Devel::PPPort $VERSION.\\n" . "Your Devel::PPPort is only version \$Devel::PPPort::VERSION.\\n" . "Please install a newer version, or --unstrip will not work.\\n"; } Devel::PPPort::WriteFile(\$0); exit 0; } print <$0" or die "cannot strip $0: $!\n"; print OUT "$pl$c\n"; exit 0; } __DATA__ */ #ifndef _P_P_PORTABILITY_H_ #define _P_P_PORTABILITY_H_ #ifndef DPPP_NAMESPACE # define DPPP_NAMESPACE DPPP_ #endif #define DPPP_CAT2(x,y) CAT2(x,y) #define DPPP_(name) DPPP_CAT2(DPPP_NAMESPACE, name) #ifndef PERL_REVISION # if !defined(__PATCHLEVEL_H_INCLUDED__) && !(defined(PATCHLEVEL) && defined(SUBVERSION)) # define PERL_PATCHLEVEL_H_IMPLICIT # include # endif # if !(defined(PERL_VERSION) || (defined(SUBVERSION) && defined(PATCHLEVEL))) # include # endif # ifndef PERL_REVISION # define PERL_REVISION (5) /* Replace: 1 */ # define PERL_VERSION PATCHLEVEL # define PERL_SUBVERSION SUBVERSION /* Replace PERL_PATCHLEVEL with PERL_VERSION */ /* Replace: 0 */ # endif #endif #define _dpppDEC2BCD(dec) ((((dec)/100)<<8)|((((dec)%100)/10)<<4)|((dec)%10)) #define PERL_BCDVERSION ((_dpppDEC2BCD(PERL_REVISION)<<24)|(_dpppDEC2BCD(PERL_VERSION)<<12)|_dpppDEC2BCD(PERL_SUBVERSION)) /* It is very unlikely that anyone will try to use this with Perl 6 (or greater), but who knows. */ #if PERL_REVISION != 5 # error ppport.h only works with Perl version 5 #endif /* PERL_REVISION != 5 */ #ifndef dTHR # define dTHR dNOOP #endif #ifndef dTHX # define dTHX dNOOP #endif #ifndef dTHXa # define dTHXa(x) dNOOP #endif #ifndef pTHX # define pTHX void #endif #ifndef pTHX_ # define pTHX_ #endif #ifndef aTHX # define aTHX #endif #ifndef aTHX_ # define aTHX_ #endif #if (PERL_BCDVERSION < 0x5006000) # ifdef USE_THREADS # define aTHXR thr # define aTHXR_ thr, # else # define aTHXR # define aTHXR_ # endif # define dTHXR dTHR #else # define aTHXR aTHX # define aTHXR_ aTHX_ # define dTHXR dTHX #endif #ifndef dTHXoa # define dTHXoa(x) dTHXa(x) #endif #ifdef I_LIMITS # include #endif #ifndef PERL_UCHAR_MIN # define PERL_UCHAR_MIN ((unsigned char)0) #endif #ifndef PERL_UCHAR_MAX # ifdef UCHAR_MAX # define PERL_UCHAR_MAX ((unsigned char)UCHAR_MAX) # else # ifdef MAXUCHAR # define PERL_UCHAR_MAX ((unsigned char)MAXUCHAR) # else # define PERL_UCHAR_MAX ((unsigned char)~(unsigned)0) # endif # endif #endif #ifndef PERL_USHORT_MIN # define PERL_USHORT_MIN ((unsigned short)0) #endif #ifndef PERL_USHORT_MAX # ifdef USHORT_MAX # define PERL_USHORT_MAX ((unsigned short)USHORT_MAX) # else # ifdef MAXUSHORT # define PERL_USHORT_MAX ((unsigned short)MAXUSHORT) # else # ifdef USHRT_MAX # define PERL_USHORT_MAX ((unsigned short)USHRT_MAX) # else # define PERL_USHORT_MAX ((unsigned short)~(unsigned)0) # endif # endif # endif #endif #ifndef PERL_SHORT_MAX # ifdef SHORT_MAX # define PERL_SHORT_MAX ((short)SHORT_MAX) # else # ifdef MAXSHORT /* Often used in */ # define PERL_SHORT_MAX ((short)MAXSHORT) # else # ifdef SHRT_MAX # define PERL_SHORT_MAX ((short)SHRT_MAX) # else # define PERL_SHORT_MAX ((short) (PERL_USHORT_MAX >> 1)) # endif # endif # endif #endif #ifndef PERL_SHORT_MIN # ifdef SHORT_MIN # define PERL_SHORT_MIN ((short)SHORT_MIN) # else # ifdef MINSHORT # define PERL_SHORT_MIN ((short)MINSHORT) # else # ifdef SHRT_MIN # define PERL_SHORT_MIN ((short)SHRT_MIN) # else # define PERL_SHORT_MIN (-PERL_SHORT_MAX - ((3 & -1) == 3)) # endif # endif # endif #endif #ifndef PERL_UINT_MAX # ifdef UINT_MAX # define PERL_UINT_MAX ((unsigned int)UINT_MAX) # else # ifdef MAXUINT # define PERL_UINT_MAX ((unsigned int)MAXUINT) # else # define PERL_UINT_MAX (~(unsigned int)0) # endif # endif #endif #ifndef PERL_UINT_MIN # define PERL_UINT_MIN ((unsigned int)0) #endif #ifndef PERL_INT_MAX # ifdef INT_MAX # define PERL_INT_MAX ((int)INT_MAX) # else # ifdef MAXINT /* Often used in */ # define PERL_INT_MAX ((int)MAXINT) # else # define PERL_INT_MAX ((int)(PERL_UINT_MAX >> 1)) # endif # endif #endif #ifndef PERL_INT_MIN # ifdef INT_MIN # define PERL_INT_MIN ((int)INT_MIN) # else # ifdef MININT # define PERL_INT_MIN ((int)MININT) # else # define PERL_INT_MIN (-PERL_INT_MAX - ((3 & -1) == 3)) # endif # endif #endif #ifndef PERL_ULONG_MAX # ifdef ULONG_MAX # define PERL_ULONG_MAX ((unsigned long)ULONG_MAX) # else # ifdef MAXULONG # define PERL_ULONG_MAX ((unsigned long)MAXULONG) # else # define PERL_ULONG_MAX (~(unsigned long)0) # endif # endif #endif #ifndef PERL_ULONG_MIN # define PERL_ULONG_MIN ((unsigned long)0L) #endif #ifndef PERL_LONG_MAX # ifdef LONG_MAX # define PERL_LONG_MAX ((long)LONG_MAX) # else # ifdef MAXLONG # define PERL_LONG_MAX ((long)MAXLONG) # else # define PERL_LONG_MAX ((long) (PERL_ULONG_MAX >> 1)) # endif # endif #endif #ifndef PERL_LONG_MIN # ifdef LONG_MIN # define PERL_LONG_MIN ((long)LONG_MIN) # else # ifdef MINLONG # define PERL_LONG_MIN ((long)MINLONG) # else # define PERL_LONG_MIN (-PERL_LONG_MAX - ((3 & -1) == 3)) # endif # endif #endif #if defined(HAS_QUAD) && (defined(convex) || defined(uts)) # ifndef PERL_UQUAD_MAX # ifdef ULONGLONG_MAX # define PERL_UQUAD_MAX ((unsigned long long)ULONGLONG_MAX) # else # ifdef MAXULONGLONG # define PERL_UQUAD_MAX ((unsigned long long)MAXULONGLONG) # else # define PERL_UQUAD_MAX (~(unsigned long long)0) # endif # endif # endif # ifndef PERL_UQUAD_MIN # define PERL_UQUAD_MIN ((unsigned long long)0L) # endif # ifndef PERL_QUAD_MAX # ifdef LONGLONG_MAX # define PERL_QUAD_MAX ((long long)LONGLONG_MAX) # else # ifdef MAXLONGLONG # define PERL_QUAD_MAX ((long long)MAXLONGLONG) # else # define PERL_QUAD_MAX ((long long) (PERL_UQUAD_MAX >> 1)) # endif # endif # endif # ifndef PERL_QUAD_MIN # ifdef LONGLONG_MIN # define PERL_QUAD_MIN ((long long)LONGLONG_MIN) # else # ifdef MINLONGLONG # define PERL_QUAD_MIN ((long long)MINLONGLONG) # else # define PERL_QUAD_MIN (-PERL_QUAD_MAX - ((3 & -1) == 3)) # endif # endif # endif #endif /* This is based on code from 5.003 perl.h */ #ifdef HAS_QUAD # ifdef cray #ifndef IVTYPE # define IVTYPE int #endif #ifndef IV_MIN # define IV_MIN PERL_INT_MIN #endif #ifndef IV_MAX # define IV_MAX PERL_INT_MAX #endif #ifndef UV_MIN # define UV_MIN PERL_UINT_MIN #endif #ifndef UV_MAX # define UV_MAX PERL_UINT_MAX #endif # ifdef INTSIZE #ifndef IVSIZE # define IVSIZE INTSIZE #endif # endif # else # if defined(convex) || defined(uts) #ifndef IVTYPE # define IVTYPE long long #endif #ifndef IV_MIN # define IV_MIN PERL_QUAD_MIN #endif #ifndef IV_MAX # define IV_MAX PERL_QUAD_MAX #endif #ifndef UV_MIN # define UV_MIN PERL_UQUAD_MIN #endif #ifndef UV_MAX # define UV_MAX PERL_UQUAD_MAX #endif # ifdef LONGLONGSIZE #ifndef IVSIZE # define IVSIZE LONGLONGSIZE #endif # endif # else #ifndef IVTYPE # define IVTYPE long #endif #ifndef IV_MIN # define IV_MIN PERL_LONG_MIN #endif #ifndef IV_MAX # define IV_MAX PERL_LONG_MAX #endif #ifndef UV_MIN # define UV_MIN PERL_ULONG_MIN #endif #ifndef UV_MAX # define UV_MAX PERL_ULONG_MAX #endif # ifdef LONGSIZE #ifndef IVSIZE # define IVSIZE LONGSIZE #endif # endif # endif # endif #ifndef IVSIZE # define IVSIZE 8 #endif #ifndef PERL_QUAD_MIN # define PERL_QUAD_MIN IV_MIN #endif #ifndef PERL_QUAD_MAX # define PERL_QUAD_MAX IV_MAX #endif #ifndef PERL_UQUAD_MIN # define PERL_UQUAD_MIN UV_MIN #endif #ifndef PERL_UQUAD_MAX # define PERL_UQUAD_MAX UV_MAX #endif #else #ifndef IVTYPE # define IVTYPE long #endif #ifndef IV_MIN # define IV_MIN PERL_LONG_MIN #endif #ifndef IV_MAX # define IV_MAX PERL_LONG_MAX #endif #ifndef UV_MIN # define UV_MIN PERL_ULONG_MIN #endif #ifndef UV_MAX # define UV_MAX PERL_ULONG_MAX #endif #endif #ifndef IVSIZE # ifdef LONGSIZE # define IVSIZE LONGSIZE # else # define IVSIZE 4 /* A bold guess, but the best we can make. */ # endif #endif #ifndef UVTYPE # define UVTYPE unsigned IVTYPE #endif #ifndef UVSIZE # define UVSIZE IVSIZE #endif #ifndef sv_setuv # define sv_setuv(sv, uv) \ STMT_START { \ UV TeMpUv = uv; \ if (TeMpUv <= IV_MAX) \ sv_setiv(sv, TeMpUv); \ else \ sv_setnv(sv, (double)TeMpUv); \ } STMT_END #endif #ifndef newSVuv # define newSVuv(uv) ((uv) <= IV_MAX ? newSViv((IV)uv) : newSVnv((NV)uv)) #endif #ifndef sv_2uv # define sv_2uv(sv) ((PL_Sv = (sv)), (UV) (SvNOK(PL_Sv) ? SvNV(PL_Sv) : sv_2nv(PL_Sv))) #endif #ifndef SvUVX # define SvUVX(sv) ((UV)SvIVX(sv)) #endif #ifndef SvUVXx # define SvUVXx(sv) SvUVX(sv) #endif #ifndef SvUV # define SvUV(sv) (SvIOK(sv) ? SvUVX(sv) : sv_2uv(sv)) #endif #ifndef SvUVx # define SvUVx(sv) ((PL_Sv = (sv)), SvUV(PL_Sv)) #endif /* Hint: sv_uv * Always use the SvUVx() macro instead of sv_uv(). */ #ifndef sv_uv # define sv_uv(sv) SvUVx(sv) #endif #if !defined(SvUOK) && defined(SvIOK_UV) # define SvUOK(sv) SvIOK_UV(sv) #endif #ifndef XST_mUV # define XST_mUV(i,v) (ST(i) = sv_2mortal(newSVuv(v)) ) #endif #ifndef XSRETURN_UV # define XSRETURN_UV(v) STMT_START { XST_mUV(0,v); XSRETURN(1); } STMT_END #endif #ifndef PUSHu # define PUSHu(u) STMT_START { sv_setuv(TARG, (UV)(u)); PUSHTARG; } STMT_END #endif #ifndef XPUSHu # define XPUSHu(u) STMT_START { sv_setuv(TARG, (UV)(u)); XPUSHTARG; } STMT_END #endif #ifdef HAS_MEMCMP #ifndef memNE # define memNE(s1,s2,l) (memcmp(s1,s2,l)) #endif #ifndef memEQ # define memEQ(s1,s2,l) (!memcmp(s1,s2,l)) #endif #else #ifndef memNE # define memNE(s1,s2,l) (bcmp(s1,s2,l)) #endif #ifndef memEQ # define memEQ(s1,s2,l) (!bcmp(s1,s2,l)) #endif #endif #ifndef MoveD # define MoveD(s,d,n,t) memmove((char*)(d),(char*)(s), (n) * sizeof(t)) #endif #ifndef CopyD # define CopyD(s,d,n,t) memcpy((char*)(d),(char*)(s), (n) * sizeof(t)) #endif #ifdef HAS_MEMSET #ifndef ZeroD # define ZeroD(d,n,t) memzero((char*)(d), (n) * sizeof(t)) #endif #else #ifndef ZeroD # define ZeroD(d,n,t) ((void)memzero((char*)(d), (n) * sizeof(t)), d) #endif #endif #ifndef PoisonWith # define PoisonWith(d,n,t,b) (void)memset((char*)(d), (U8)(b), (n) * sizeof(t)) #endif #ifndef PoisonNew # define PoisonNew(d,n,t) PoisonWith(d,n,t,0xAB) #endif #ifndef PoisonFree # define PoisonFree(d,n,t) PoisonWith(d,n,t,0xEF) #endif #ifndef Poison # define Poison(d,n,t) PoisonFree(d,n,t) #endif #ifndef Newx # define Newx(v,n,t) New(0,v,n,t) #endif #ifndef Newxc # define Newxc(v,n,t,c) Newc(0,v,n,t,c) #endif #ifndef Newxz # define Newxz(v,n,t) Newz(0,v,n,t) #endif #ifndef PERL_UNUSED_DECL # ifdef HASATTRIBUTE # if (defined(__GNUC__) && defined(__cplusplus)) || defined(__INTEL_COMPILER) # define PERL_UNUSED_DECL # else # define PERL_UNUSED_DECL __attribute__((unused)) # endif # else # define PERL_UNUSED_DECL # endif #endif #ifndef PERL_UNUSED_ARG # if defined(lint) && defined(S_SPLINT_S) /* www.splint.org */ # include # define PERL_UNUSED_ARG(x) NOTE(ARGUNUSED(x)) # else # define PERL_UNUSED_ARG(x) ((void)x) # endif #endif #ifndef PERL_UNUSED_VAR # define PERL_UNUSED_VAR(x) ((void)x) #endif #ifndef PERL_UNUSED_CONTEXT # ifdef USE_ITHREADS # define PERL_UNUSED_CONTEXT PERL_UNUSED_ARG(my_perl) # else # define PERL_UNUSED_CONTEXT # endif #endif #ifndef NOOP # define NOOP /*EMPTY*/(void)0 #endif #ifndef dNOOP # define dNOOP extern int /*@unused@*/ Perl___notused PERL_UNUSED_DECL #endif #ifndef NVTYPE # if defined(USE_LONG_DOUBLE) && defined(HAS_LONG_DOUBLE) # define NVTYPE long double # else # define NVTYPE double # endif typedef NVTYPE NV; #endif #ifndef INT2PTR # if (IVSIZE == PTRSIZE) && (UVSIZE == PTRSIZE) # define PTRV UV # define INT2PTR(any,d) (any)(d) # else # if PTRSIZE == LONGSIZE # define PTRV unsigned long # else # define PTRV unsigned # endif # define INT2PTR(any,d) (any)(PTRV)(d) # endif #endif #ifndef PTR2ul # if PTRSIZE == LONGSIZE # define PTR2ul(p) (unsigned long)(p) # else # define PTR2ul(p) INT2PTR(unsigned long,p) # endif #endif #ifndef PTR2nat # define PTR2nat(p) (PTRV)(p) #endif #ifndef NUM2PTR # define NUM2PTR(any,d) (any)PTR2nat(d) #endif #ifndef PTR2IV # define PTR2IV(p) INT2PTR(IV,p) #endif #ifndef PTR2UV # define PTR2UV(p) INT2PTR(UV,p) #endif #ifndef PTR2NV # define PTR2NV(p) NUM2PTR(NV,p) #endif #undef START_EXTERN_C #undef END_EXTERN_C #undef EXTERN_C #ifdef __cplusplus # define START_EXTERN_C extern "C" { # define END_EXTERN_C } # define EXTERN_C extern "C" #else # define START_EXTERN_C # define END_EXTERN_C # define EXTERN_C extern #endif #if defined(PERL_GCC_PEDANTIC) # ifndef PERL_GCC_BRACE_GROUPS_FORBIDDEN # define PERL_GCC_BRACE_GROUPS_FORBIDDEN # endif #endif #if defined(__GNUC__) && !defined(PERL_GCC_BRACE_GROUPS_FORBIDDEN) && !defined(__cplusplus) # ifndef PERL_USE_GCC_BRACE_GROUPS # define PERL_USE_GCC_BRACE_GROUPS # endif #endif #undef STMT_START #undef STMT_END #ifdef PERL_USE_GCC_BRACE_GROUPS # define STMT_START (void)( /* gcc supports ``({ STATEMENTS; })'' */ # define STMT_END ) #else # if defined(VOIDFLAGS) && (VOIDFLAGS) && (defined(sun) || defined(__sun__)) && !defined(__GNUC__) # define STMT_START if (1) # define STMT_END else (void)0 # else # define STMT_START do # define STMT_END while (0) # endif #endif #ifndef boolSV # define boolSV(b) ((b) ? &PL_sv_yes : &PL_sv_no) #endif /* DEFSV appears first in 5.004_56 */ #ifndef DEFSV # define DEFSV GvSV(PL_defgv) #endif #ifndef SAVE_DEFSV # define SAVE_DEFSV SAVESPTR(GvSV(PL_defgv)) #endif #ifndef DEFSV_set # define DEFSV_set(sv) (DEFSV = (sv)) #endif /* Older perls (<=5.003) lack AvFILLp */ #ifndef AvFILLp # define AvFILLp AvFILL #endif #ifndef ERRSV # define ERRSV get_sv("@",FALSE) #endif /* Hint: gv_stashpvn * This function's backport doesn't support the length parameter, but * rather ignores it. Portability can only be ensured if the length * parameter is used for speed reasons, but the length can always be * correctly computed from the string argument. */ #ifndef gv_stashpvn # define gv_stashpvn(str,len,create) gv_stashpv(str,create) #endif /* Replace: 1 */ #ifndef get_cv # define get_cv perl_get_cv #endif #ifndef get_sv # define get_sv perl_get_sv #endif #ifndef get_av # define get_av perl_get_av #endif #ifndef get_hv # define get_hv perl_get_hv #endif /* Replace: 0 */ #ifndef dUNDERBAR # define dUNDERBAR dNOOP #endif #ifndef UNDERBAR # define UNDERBAR DEFSV #endif #ifndef dAX # define dAX I32 ax = MARK - PL_stack_base + 1 #endif #ifndef dITEMS # define dITEMS I32 items = SP - MARK #endif #ifndef dXSTARG # define dXSTARG SV * targ = sv_newmortal() #endif #ifndef dAXMARK # define dAXMARK I32 ax = POPMARK; \ register SV ** const mark = PL_stack_base + ax++ #endif #ifndef XSprePUSH # define XSprePUSH (sp = PL_stack_base + ax - 1) #endif #if (PERL_BCDVERSION < 0x5005000) # undef XSRETURN # define XSRETURN(off) \ STMT_START { \ PL_stack_sp = PL_stack_base + ax + ((off) - 1); \ return; \ } STMT_END #endif #ifndef XSPROTO # define XSPROTO(name) void name(pTHX_ CV* cv) #endif #ifndef SVfARG # define SVfARG(p) ((void*)(p)) #endif #ifndef PERL_ABS # define PERL_ABS(x) ((x) < 0 ? -(x) : (x)) #endif #ifndef dVAR # define dVAR dNOOP #endif #ifndef SVf # define SVf "_" #endif #ifndef UTF8_MAXBYTES # define UTF8_MAXBYTES UTF8_MAXLEN #endif #ifndef CPERLscope # define CPERLscope(x) x #endif #ifndef PERL_HASH # define PERL_HASH(hash,str,len) \ STMT_START { \ const char *s_PeRlHaSh = str; \ I32 i_PeRlHaSh = len; \ U32 hash_PeRlHaSh = 0; \ while (i_PeRlHaSh--) \ hash_PeRlHaSh = hash_PeRlHaSh * 33 + *s_PeRlHaSh++; \ (hash) = hash_PeRlHaSh; \ } STMT_END #endif #ifndef PERLIO_FUNCS_DECL # ifdef PERLIO_FUNCS_CONST # define PERLIO_FUNCS_DECL(funcs) const PerlIO_funcs funcs # define PERLIO_FUNCS_CAST(funcs) (PerlIO_funcs*)(funcs) # else # define PERLIO_FUNCS_DECL(funcs) PerlIO_funcs funcs # define PERLIO_FUNCS_CAST(funcs) (funcs) # endif #endif /* provide these typedefs for older perls */ #if (PERL_BCDVERSION < 0x5009003) # ifdef ARGSproto typedef OP* (CPERLscope(*Perl_ppaddr_t))(ARGSproto); # else typedef OP* (CPERLscope(*Perl_ppaddr_t))(pTHX); # endif typedef OP* (CPERLscope(*Perl_check_t)) (pTHX_ OP*); #endif #ifndef isPSXSPC # define isPSXSPC(c) (isSPACE(c) || (c) == '\v') #endif #ifndef isBLANK # define isBLANK(c) ((c) == ' ' || (c) == '\t') #endif #ifdef EBCDIC #ifndef isALNUMC # define isALNUMC(c) isalnum(c) #endif #ifndef isASCII # define isASCII(c) isascii(c) #endif #ifndef isCNTRL # define isCNTRL(c) iscntrl(c) #endif #ifndef isGRAPH # define isGRAPH(c) isgraph(c) #endif #ifndef isPRINT # define isPRINT(c) isprint(c) #endif #ifndef isPUNCT # define isPUNCT(c) ispunct(c) #endif #ifndef isXDIGIT # define isXDIGIT(c) isxdigit(c) #endif #else # if (PERL_BCDVERSION < 0x5010000) /* Hint: isPRINT * The implementation in older perl versions includes all of the * isSPACE() characters, which is wrong. The version provided by * Devel::PPPort always overrides a present buggy version. */ # undef isPRINT # endif #ifndef isALNUMC # define isALNUMC(c) (isALPHA(c) || isDIGIT(c)) #endif #ifndef isASCII # define isASCII(c) ((c) <= 127) #endif #ifndef isCNTRL # define isCNTRL(c) ((c) < ' ' || (c) == 127) #endif #ifndef isGRAPH # define isGRAPH(c) (isALNUM(c) || isPUNCT(c)) #endif #ifndef isPRINT # define isPRINT(c) (((c) >= 32 && (c) < 127)) #endif #ifndef isPUNCT # define isPUNCT(c) (((c) >= 33 && (c) <= 47) || ((c) >= 58 && (c) <= 64) || ((c) >= 91 && (c) <= 96) || ((c) >= 123 && (c) <= 126)) #endif #ifndef isXDIGIT # define isXDIGIT(c) (isDIGIT(c) || ((c) >= 'a' && (c) <= 'f') || ((c) >= 'A' && (c) <= 'F')) #endif #endif #ifndef PERL_SIGNALS_UNSAFE_FLAG #define PERL_SIGNALS_UNSAFE_FLAG 0x0001 #if (PERL_BCDVERSION < 0x5008000) # define D_PPP_PERL_SIGNALS_INIT PERL_SIGNALS_UNSAFE_FLAG #else # define D_PPP_PERL_SIGNALS_INIT 0 #endif #if defined(NEED_PL_signals) static U32 DPPP_(my_PL_signals) = D_PPP_PERL_SIGNALS_INIT; #elif defined(NEED_PL_signals_GLOBAL) U32 DPPP_(my_PL_signals) = D_PPP_PERL_SIGNALS_INIT; #else extern U32 DPPP_(my_PL_signals); #endif #define PL_signals DPPP_(my_PL_signals) #endif /* Hint: PL_ppaddr * Calling an op via PL_ppaddr requires passing a context argument * for threaded builds. Since the context argument is different for * 5.005 perls, you can use aTHXR (supplied by ppport.h), which will * automatically be defined as the correct argument. */ #if (PERL_BCDVERSION <= 0x5005005) /* Replace: 1 */ # define PL_ppaddr ppaddr # define PL_no_modify no_modify /* Replace: 0 */ #endif #if (PERL_BCDVERSION <= 0x5004005) /* Replace: 1 */ # define PL_DBsignal DBsignal # define PL_DBsingle DBsingle # define PL_DBsub DBsub # define PL_DBtrace DBtrace # define PL_Sv Sv # define PL_bufend bufend # define PL_bufptr bufptr # define PL_compiling compiling # define PL_copline copline # define PL_curcop curcop # define PL_curstash curstash # define PL_debstash debstash # define PL_defgv defgv # define PL_diehook diehook # define PL_dirty dirty # define PL_dowarn dowarn # define PL_errgv errgv # define PL_error_count error_count # define PL_expect expect # define PL_hexdigit hexdigit # define PL_hints hints # define PL_in_my in_my # define PL_laststatval laststatval # define PL_lex_state lex_state # define PL_lex_stuff lex_stuff # define PL_linestr linestr # define PL_na na # define PL_perl_destruct_level perl_destruct_level # define PL_perldb perldb # define PL_rsfp_filters rsfp_filters # define PL_rsfp rsfp # define PL_stack_base stack_base # define PL_stack_sp stack_sp # define PL_statcache statcache # define PL_stdingv stdingv # define PL_sv_arenaroot sv_arenaroot # define PL_sv_no sv_no # define PL_sv_undef sv_undef # define PL_sv_yes sv_yes # define PL_tainted tainted # define PL_tainting tainting # define PL_tokenbuf tokenbuf /* Replace: 0 */ #endif /* Warning: PL_parser * For perl versions earlier than 5.9.5, this is an always * non-NULL dummy. Also, it cannot be dereferenced. Don't * use it if you can avoid is and unless you absolutely know * what you're doing. * If you always check that PL_parser is non-NULL, you can * define DPPP_PL_parser_NO_DUMMY to avoid the creation of * a dummy parser structure. */ #if (PERL_BCDVERSION >= 0x5009005) # ifdef DPPP_PL_parser_NO_DUMMY # define D_PPP_my_PL_parser_var(var) ((PL_parser ? PL_parser : \ (croak("panic: PL_parser == NULL in %s:%d", \ __FILE__, __LINE__), (yy_parser *) NULL))->var) # else # ifdef DPPP_PL_parser_NO_DUMMY_WARNING # define D_PPP_parser_dummy_warning(var) # else # define D_PPP_parser_dummy_warning(var) \ warn("warning: dummy PL_" #var " used in %s:%d", __FILE__, __LINE__), # endif # define D_PPP_my_PL_parser_var(var) ((PL_parser ? PL_parser : \ (D_PPP_parser_dummy_warning(var) &DPPP_(dummy_PL_parser)))->var) #if defined(NEED_PL_parser) static yy_parser DPPP_(dummy_PL_parser); #elif defined(NEED_PL_parser_GLOBAL) yy_parser DPPP_(dummy_PL_parser); #else extern yy_parser DPPP_(dummy_PL_parser); #endif # endif /* PL_expect, PL_copline, PL_rsfp, PL_rsfp_filters, PL_linestr, PL_bufptr, PL_bufend, PL_lex_state, PL_lex_stuff, PL_tokenbuf depends on PL_parser */ /* Warning: PL_expect, PL_copline, PL_rsfp, PL_rsfp_filters, PL_linestr, PL_bufptr, PL_bufend, PL_lex_state, PL_lex_stuff, PL_tokenbuf * Do not use this variable unless you know exactly what you're * doint. It is internal to the perl parser and may change or even * be removed in the future. As of perl 5.9.5, you have to check * for (PL_parser != NULL) for this variable to have any effect. * An always non-NULL PL_parser dummy is provided for earlier * perl versions. * If PL_parser is NULL when you try to access this variable, a * dummy is being accessed instead and a warning is issued unless * you define DPPP_PL_parser_NO_DUMMY_WARNING. * If DPPP_PL_parser_NO_DUMMY is defined, the code trying to access * this variable will croak with a panic message. */ # define PL_expect D_PPP_my_PL_parser_var(expect) # define PL_copline D_PPP_my_PL_parser_var(copline) # define PL_rsfp D_PPP_my_PL_parser_var(rsfp) # define PL_rsfp_filters D_PPP_my_PL_parser_var(rsfp_filters) # define PL_linestr D_PPP_my_PL_parser_var(linestr) # define PL_bufptr D_PPP_my_PL_parser_var(bufptr) # define PL_bufend D_PPP_my_PL_parser_var(bufend) # define PL_lex_state D_PPP_my_PL_parser_var(lex_state) # define PL_lex_stuff D_PPP_my_PL_parser_var(lex_stuff) # define PL_tokenbuf D_PPP_my_PL_parser_var(tokenbuf) # define PL_in_my D_PPP_my_PL_parser_var(in_my) # define PL_in_my_stash D_PPP_my_PL_parser_var(in_my_stash) # define PL_error_count D_PPP_my_PL_parser_var(error_count) #else /* ensure that PL_parser != NULL and cannot be dereferenced */ # define PL_parser ((void *) 1) #endif #ifndef mPUSHs # define mPUSHs(s) PUSHs(sv_2mortal(s)) #endif #ifndef PUSHmortal # define PUSHmortal PUSHs(sv_newmortal()) #endif #ifndef mPUSHp # define mPUSHp(p,l) sv_setpvn(PUSHmortal, (p), (l)) #endif #ifndef mPUSHn # define mPUSHn(n) sv_setnv(PUSHmortal, (NV)(n)) #endif #ifndef mPUSHi # define mPUSHi(i) sv_setiv(PUSHmortal, (IV)(i)) #endif #ifndef mPUSHu # define mPUSHu(u) sv_setuv(PUSHmortal, (UV)(u)) #endif #ifndef mXPUSHs # define mXPUSHs(s) XPUSHs(sv_2mortal(s)) #endif #ifndef XPUSHmortal # define XPUSHmortal XPUSHs(sv_newmortal()) #endif #ifndef mXPUSHp # define mXPUSHp(p,l) STMT_START { EXTEND(sp,1); sv_setpvn(PUSHmortal, (p), (l)); } STMT_END #endif #ifndef mXPUSHn # define mXPUSHn(n) STMT_START { EXTEND(sp,1); sv_setnv(PUSHmortal, (NV)(n)); } STMT_END #endif #ifndef mXPUSHi # define mXPUSHi(i) STMT_START { EXTEND(sp,1); sv_setiv(PUSHmortal, (IV)(i)); } STMT_END #endif #ifndef mXPUSHu # define mXPUSHu(u) STMT_START { EXTEND(sp,1); sv_setuv(PUSHmortal, (UV)(u)); } STMT_END #endif /* Replace: 1 */ #ifndef call_sv # define call_sv perl_call_sv #endif #ifndef call_pv # define call_pv perl_call_pv #endif #ifndef call_argv # define call_argv perl_call_argv #endif #ifndef call_method # define call_method perl_call_method #endif #ifndef eval_sv # define eval_sv perl_eval_sv #endif /* Replace: 0 */ #ifndef PERL_LOADMOD_DENY # define PERL_LOADMOD_DENY 0x1 #endif #ifndef PERL_LOADMOD_NOIMPORT # define PERL_LOADMOD_NOIMPORT 0x2 #endif #ifndef PERL_LOADMOD_IMPORT_OPS # define PERL_LOADMOD_IMPORT_OPS 0x4 #endif #ifndef G_METHOD # define G_METHOD 64 # ifdef call_sv # undef call_sv # endif # if (PERL_BCDVERSION < 0x5006000) # define call_sv(sv, flags) ((flags) & G_METHOD ? perl_call_method((char *) SvPV_nolen_const(sv), \ (flags) & ~G_METHOD) : perl_call_sv(sv, flags)) # else # define call_sv(sv, flags) ((flags) & G_METHOD ? Perl_call_method(aTHX_ (char *) SvPV_nolen_const(sv), \ (flags) & ~G_METHOD) : Perl_call_sv(aTHX_ sv, flags)) # endif #endif /* Replace perl_eval_pv with eval_pv */ #ifndef eval_pv #if defined(NEED_eval_pv) static SV* DPPP_(my_eval_pv)(char *p, I32 croak_on_error); static #else extern SV* DPPP_(my_eval_pv)(char *p, I32 croak_on_error); #endif #ifdef eval_pv # undef eval_pv #endif #define eval_pv(a,b) DPPP_(my_eval_pv)(aTHX_ a,b) #define Perl_eval_pv DPPP_(my_eval_pv) #if defined(NEED_eval_pv) || defined(NEED_eval_pv_GLOBAL) SV* DPPP_(my_eval_pv)(char *p, I32 croak_on_error) { dSP; SV* sv = newSVpv(p, 0); PUSHMARK(sp); eval_sv(sv, G_SCALAR); SvREFCNT_dec(sv); SPAGAIN; sv = POPs; PUTBACK; if (croak_on_error && SvTRUE(GvSV(errgv))) croak(SvPVx(GvSV(errgv), na)); return sv; } #endif #endif #ifndef vload_module #if defined(NEED_vload_module) static void DPPP_(my_vload_module)(U32 flags, SV *name, SV *ver, va_list *args); static #else extern void DPPP_(my_vload_module)(U32 flags, SV *name, SV *ver, va_list *args); #endif #ifdef vload_module # undef vload_module #endif #define vload_module(a,b,c,d) DPPP_(my_vload_module)(aTHX_ a,b,c,d) #define Perl_vload_module DPPP_(my_vload_module) #if defined(NEED_vload_module) || defined(NEED_vload_module_GLOBAL) void DPPP_(my_vload_module)(U32 flags, SV *name, SV *ver, va_list *args) { dTHR; dVAR; OP *veop, *imop; OP * const modname = newSVOP(OP_CONST, 0, name); /* 5.005 has a somewhat hacky force_normal that doesn't croak on SvREADONLY() if PL_compling is true. Current perls take care in ck_require() to correctly turn off SvREADONLY before calling force_normal_flags(). This seems a better fix than fudging PL_compling */ SvREADONLY_off(((SVOP*)modname)->op_sv); modname->op_private |= OPpCONST_BARE; if (ver) { veop = newSVOP(OP_CONST, 0, ver); } else veop = NULL; if (flags & PERL_LOADMOD_NOIMPORT) { imop = sawparens(newNULLLIST()); } else if (flags & PERL_LOADMOD_IMPORT_OPS) { imop = va_arg(*args, OP*); } else { SV *sv; imop = NULL; sv = va_arg(*args, SV*); while (sv) { imop = append_elem(OP_LIST, imop, newSVOP(OP_CONST, 0, sv)); sv = va_arg(*args, SV*); } } { const line_t ocopline = PL_copline; COP * const ocurcop = PL_curcop; const int oexpect = PL_expect; #if (PERL_BCDVERSION >= 0x5004000) utilize(!(flags & PERL_LOADMOD_DENY), start_subparse(FALSE, 0), veop, modname, imop); #else utilize(!(flags & PERL_LOADMOD_DENY), start_subparse(), modname, imop); #endif PL_expect = oexpect; PL_copline = ocopline; PL_curcop = ocurcop; } } #endif #endif #ifndef load_module #if defined(NEED_load_module) static void DPPP_(my_load_module)(U32 flags, SV *name, SV *ver, ...); static #else extern void DPPP_(my_load_module)(U32 flags, SV *name, SV *ver, ...); #endif #ifdef load_module # undef load_module #endif #define load_module DPPP_(my_load_module) #define Perl_load_module DPPP_(my_load_module) #if defined(NEED_load_module) || defined(NEED_load_module_GLOBAL) void DPPP_(my_load_module)(U32 flags, SV *name, SV *ver, ...) { va_list args; va_start(args, ver); vload_module(flags, name, ver, &args); va_end(args); } #endif #endif #ifndef newRV_inc # define newRV_inc(sv) newRV(sv) /* Replace */ #endif #ifndef newRV_noinc #if defined(NEED_newRV_noinc) static SV * DPPP_(my_newRV_noinc)(SV *sv); static #else extern SV * DPPP_(my_newRV_noinc)(SV *sv); #endif #ifdef newRV_noinc # undef newRV_noinc #endif #define newRV_noinc(a) DPPP_(my_newRV_noinc)(aTHX_ a) #define Perl_newRV_noinc DPPP_(my_newRV_noinc) #if defined(NEED_newRV_noinc) || defined(NEED_newRV_noinc_GLOBAL) SV * DPPP_(my_newRV_noinc)(SV *sv) { SV *rv = (SV *)newRV(sv); SvREFCNT_dec(sv); return rv; } #endif #endif /* Hint: newCONSTSUB * Returns a CV* as of perl-5.7.1. This return value is not supported * by Devel::PPPort. */ /* newCONSTSUB from IO.xs is in the core starting with 5.004_63 */ #if (PERL_BCDVERSION < 0x5004063) && (PERL_BCDVERSION != 0x5004005) #if defined(NEED_newCONSTSUB) static void DPPP_(my_newCONSTSUB)(HV *stash, const char *name, SV *sv); static #else extern void DPPP_(my_newCONSTSUB)(HV *stash, const char *name, SV *sv); #endif #ifdef newCONSTSUB # undef newCONSTSUB #endif #define newCONSTSUB(a,b,c) DPPP_(my_newCONSTSUB)(aTHX_ a,b,c) #define Perl_newCONSTSUB DPPP_(my_newCONSTSUB) #if defined(NEED_newCONSTSUB) || defined(NEED_newCONSTSUB_GLOBAL) /* This is just a trick to avoid a dependency of newCONSTSUB on PL_parser */ /* (There's no PL_parser in perl < 5.005, so this is completely safe) */ #define D_PPP_PL_copline PL_copline void DPPP_(my_newCONSTSUB)(HV *stash, const char *name, SV *sv) { U32 oldhints = PL_hints; HV *old_cop_stash = PL_curcop->cop_stash; HV *old_curstash = PL_curstash; line_t oldline = PL_curcop->cop_line; PL_curcop->cop_line = D_PPP_PL_copline; PL_hints &= ~HINT_BLOCK_SCOPE; if (stash) PL_curstash = PL_curcop->cop_stash = stash; newSUB( #if (PERL_BCDVERSION < 0x5003022) start_subparse(), #elif (PERL_BCDVERSION == 0x5003022) start_subparse(0), #else /* 5.003_23 onwards */ start_subparse(FALSE, 0), #endif newSVOP(OP_CONST, 0, newSVpv((char *) name, 0)), newSVOP(OP_CONST, 0, &PL_sv_no), /* SvPV(&PL_sv_no) == "" -- GMB */ newSTATEOP(0, Nullch, newSVOP(OP_CONST, 0, sv)) ); PL_hints = oldhints; PL_curcop->cop_stash = old_cop_stash; PL_curstash = old_curstash; PL_curcop->cop_line = oldline; } #endif #endif /* * Boilerplate macros for initializing and accessing interpreter-local * data from C. All statics in extensions should be reworked to use * this, if you want to make the extension thread-safe. See ext/re/re.xs * for an example of the use of these macros. * * Code that uses these macros is responsible for the following: * 1. #define MY_CXT_KEY to a unique string, e.g. "DynaLoader_guts" * 2. Declare a typedef named my_cxt_t that is a structure that contains * all the data that needs to be interpreter-local. * 3. Use the START_MY_CXT macro after the declaration of my_cxt_t. * 4. Use the MY_CXT_INIT macro such that it is called exactly once * (typically put in the BOOT: section). * 5. Use the members of the my_cxt_t structure everywhere as * MY_CXT.member. * 6. Use the dMY_CXT macro (a declaration) in all the functions that * access MY_CXT. */ #if defined(MULTIPLICITY) || defined(PERL_OBJECT) || \ defined(PERL_CAPI) || defined(PERL_IMPLICIT_CONTEXT) #ifndef START_MY_CXT /* This must appear in all extensions that define a my_cxt_t structure, * right after the definition (i.e. at file scope). The non-threads * case below uses it to declare the data as static. */ #define START_MY_CXT #if (PERL_BCDVERSION < 0x5004068) /* Fetches the SV that keeps the per-interpreter data. */ #define dMY_CXT_SV \ SV *my_cxt_sv = get_sv(MY_CXT_KEY, FALSE) #else /* >= perl5.004_68 */ #define dMY_CXT_SV \ SV *my_cxt_sv = *hv_fetch(PL_modglobal, MY_CXT_KEY, \ sizeof(MY_CXT_KEY)-1, TRUE) #endif /* < perl5.004_68 */ /* This declaration should be used within all functions that use the * interpreter-local data. */ #define dMY_CXT \ dMY_CXT_SV; \ my_cxt_t *my_cxtp = INT2PTR(my_cxt_t*,SvUV(my_cxt_sv)) /* Creates and zeroes the per-interpreter data. * (We allocate my_cxtp in a Perl SV so that it will be released when * the interpreter goes away.) */ #define MY_CXT_INIT \ dMY_CXT_SV; \ /* newSV() allocates one more than needed */ \ my_cxt_t *my_cxtp = (my_cxt_t*)SvPVX(newSV(sizeof(my_cxt_t)-1));\ Zero(my_cxtp, 1, my_cxt_t); \ sv_setuv(my_cxt_sv, PTR2UV(my_cxtp)) /* This macro must be used to access members of the my_cxt_t structure. * e.g. MYCXT.some_data */ #define MY_CXT (*my_cxtp) /* Judicious use of these macros can reduce the number of times dMY_CXT * is used. Use is similar to pTHX, aTHX etc. */ #define pMY_CXT my_cxt_t *my_cxtp #define pMY_CXT_ pMY_CXT, #define _pMY_CXT ,pMY_CXT #define aMY_CXT my_cxtp #define aMY_CXT_ aMY_CXT, #define _aMY_CXT ,aMY_CXT #endif /* START_MY_CXT */ #ifndef MY_CXT_CLONE /* Clones the per-interpreter data. */ #define MY_CXT_CLONE \ dMY_CXT_SV; \ my_cxt_t *my_cxtp = (my_cxt_t*)SvPVX(newSV(sizeof(my_cxt_t)-1));\ Copy(INT2PTR(my_cxt_t*, SvUV(my_cxt_sv)), my_cxtp, 1, my_cxt_t);\ sv_setuv(my_cxt_sv, PTR2UV(my_cxtp)) #endif #else /* single interpreter */ #ifndef START_MY_CXT #define START_MY_CXT static my_cxt_t my_cxt; #define dMY_CXT_SV dNOOP #define dMY_CXT dNOOP #define MY_CXT_INIT NOOP #define MY_CXT my_cxt #define pMY_CXT void #define pMY_CXT_ #define _pMY_CXT #define aMY_CXT #define aMY_CXT_ #define _aMY_CXT #endif /* START_MY_CXT */ #ifndef MY_CXT_CLONE #define MY_CXT_CLONE NOOP #endif #endif #ifndef IVdf # if IVSIZE == LONGSIZE # define IVdf "ld" # define UVuf "lu" # define UVof "lo" # define UVxf "lx" # define UVXf "lX" # else # if IVSIZE == INTSIZE # define IVdf "d" # define UVuf "u" # define UVof "o" # define UVxf "x" # define UVXf "X" # endif # endif #endif #ifndef NVef # if defined(USE_LONG_DOUBLE) && defined(HAS_LONG_DOUBLE) && \ defined(PERL_PRIfldbl) && (PERL_BCDVERSION != 0x5006000) /* Not very likely, but let's try anyway. */ # define NVef PERL_PRIeldbl # define NVff PERL_PRIfldbl # define NVgf PERL_PRIgldbl # else # define NVef "e" # define NVff "f" # define NVgf "g" # endif #endif #ifndef SvREFCNT_inc # ifdef PERL_USE_GCC_BRACE_GROUPS # define SvREFCNT_inc(sv) \ ({ \ SV * const _sv = (SV*)(sv); \ if (_sv) \ (SvREFCNT(_sv))++; \ _sv; \ }) # else # define SvREFCNT_inc(sv) \ ((PL_Sv=(SV*)(sv)) ? (++(SvREFCNT(PL_Sv)),PL_Sv) : NULL) # endif #endif #ifndef SvREFCNT_inc_simple # ifdef PERL_USE_GCC_BRACE_GROUPS # define SvREFCNT_inc_simple(sv) \ ({ \ if (sv) \ (SvREFCNT(sv))++; \ (SV *)(sv); \ }) # else # define SvREFCNT_inc_simple(sv) \ ((sv) ? (SvREFCNT(sv)++,(SV*)(sv)) : NULL) # endif #endif #ifndef SvREFCNT_inc_NN # ifdef PERL_USE_GCC_BRACE_GROUPS # define SvREFCNT_inc_NN(sv) \ ({ \ SV * const _sv = (SV*)(sv); \ SvREFCNT(_sv)++; \ _sv; \ }) # else # define SvREFCNT_inc_NN(sv) \ (PL_Sv=(SV*)(sv),++(SvREFCNT(PL_Sv)),PL_Sv) # endif #endif #ifndef SvREFCNT_inc_void # ifdef PERL_USE_GCC_BRACE_GROUPS # define SvREFCNT_inc_void(sv) \ ({ \ SV * const _sv = (SV*)(sv); \ if (_sv) \ (void)(SvREFCNT(_sv)++); \ }) # else # define SvREFCNT_inc_void(sv) \ (void)((PL_Sv=(SV*)(sv)) ? ++(SvREFCNT(PL_Sv)) : 0) # endif #endif #ifndef SvREFCNT_inc_simple_void # define SvREFCNT_inc_simple_void(sv) STMT_START { if (sv) SvREFCNT(sv)++; } STMT_END #endif #ifndef SvREFCNT_inc_simple_NN # define SvREFCNT_inc_simple_NN(sv) (++SvREFCNT(sv), (SV*)(sv)) #endif #ifndef SvREFCNT_inc_void_NN # define SvREFCNT_inc_void_NN(sv) (void)(++SvREFCNT((SV*)(sv))) #endif #ifndef SvREFCNT_inc_simple_void_NN # define SvREFCNT_inc_simple_void_NN(sv) (void)(++SvREFCNT((SV*)(sv))) #endif #ifndef newSV_type #if defined(NEED_newSV_type) static SV* DPPP_(my_newSV_type)(pTHX_ svtype const t); static #else extern SV* DPPP_(my_newSV_type)(pTHX_ svtype const t); #endif #ifdef newSV_type # undef newSV_type #endif #define newSV_type(a) DPPP_(my_newSV_type)(aTHX_ a) #define Perl_newSV_type DPPP_(my_newSV_type) #if defined(NEED_newSV_type) || defined(NEED_newSV_type_GLOBAL) SV* DPPP_(my_newSV_type)(pTHX_ svtype const t) { SV* const sv = newSV(0); sv_upgrade(sv, t); return sv; } #endif #endif #if (PERL_BCDVERSION < 0x5006000) # define D_PPP_CONSTPV_ARG(x) ((char *) (x)) #else # define D_PPP_CONSTPV_ARG(x) (x) #endif #ifndef newSVpvn # define newSVpvn(data,len) ((data) \ ? ((len) ? newSVpv((data), (len)) : newSVpv("", 0)) \ : newSV(0)) #endif #ifndef newSVpvn_utf8 # define newSVpvn_utf8(s, len, u) newSVpvn_flags((s), (len), (u) ? SVf_UTF8 : 0) #endif #ifndef SVf_UTF8 # define SVf_UTF8 0 #endif #ifndef newSVpvn_flags #if defined(NEED_newSVpvn_flags) static SV * DPPP_(my_newSVpvn_flags)(pTHX_ const char *s, STRLEN len, U32 flags); static #else extern SV * DPPP_(my_newSVpvn_flags)(pTHX_ const char *s, STRLEN len, U32 flags); #endif #ifdef newSVpvn_flags # undef newSVpvn_flags #endif #define newSVpvn_flags(a,b,c) DPPP_(my_newSVpvn_flags)(aTHX_ a,b,c) #define Perl_newSVpvn_flags DPPP_(my_newSVpvn_flags) #if defined(NEED_newSVpvn_flags) || defined(NEED_newSVpvn_flags_GLOBAL) SV * DPPP_(my_newSVpvn_flags)(pTHX_ const char *s, STRLEN len, U32 flags) { SV *sv = newSVpvn(D_PPP_CONSTPV_ARG(s), len); SvFLAGS(sv) |= (flags & SVf_UTF8); return (flags & SVs_TEMP) ? sv_2mortal(sv) : sv; } #endif #endif /* Backwards compatibility stuff... :-( */ #if !defined(NEED_sv_2pv_flags) && defined(NEED_sv_2pv_nolen) # define NEED_sv_2pv_flags #endif #if !defined(NEED_sv_2pv_flags_GLOBAL) && defined(NEED_sv_2pv_nolen_GLOBAL) # define NEED_sv_2pv_flags_GLOBAL #endif /* Hint: sv_2pv_nolen * Use the SvPV_nolen() or SvPV_nolen_const() macros instead of sv_2pv_nolen(). */ #ifndef sv_2pv_nolen # define sv_2pv_nolen(sv) SvPV_nolen(sv) #endif #ifdef SvPVbyte /* Hint: SvPVbyte * Does not work in perl-5.6.1, ppport.h implements a version * borrowed from perl-5.7.3. */ #if (PERL_BCDVERSION < 0x5007000) #if defined(NEED_sv_2pvbyte) static char * DPPP_(my_sv_2pvbyte)(pTHX_ SV *sv, STRLEN *lp); static #else extern char * DPPP_(my_sv_2pvbyte)(pTHX_ SV *sv, STRLEN *lp); #endif #ifdef sv_2pvbyte # undef sv_2pvbyte #endif #define sv_2pvbyte(a,b) DPPP_(my_sv_2pvbyte)(aTHX_ a,b) #define Perl_sv_2pvbyte DPPP_(my_sv_2pvbyte) #if defined(NEED_sv_2pvbyte) || defined(NEED_sv_2pvbyte_GLOBAL) char * DPPP_(my_sv_2pvbyte)(pTHX_ SV *sv, STRLEN *lp) { sv_utf8_downgrade(sv,0); return SvPV(sv,*lp); } #endif /* Hint: sv_2pvbyte * Use the SvPVbyte() macro instead of sv_2pvbyte(). */ #undef SvPVbyte #define SvPVbyte(sv, lp) \ ((SvFLAGS(sv) & (SVf_POK|SVf_UTF8)) == (SVf_POK) \ ? ((lp = SvCUR(sv)), SvPVX(sv)) : sv_2pvbyte(sv, &lp)) #endif #else # define SvPVbyte SvPV # define sv_2pvbyte sv_2pv #endif #ifndef sv_2pvbyte_nolen # define sv_2pvbyte_nolen(sv) sv_2pv_nolen(sv) #endif /* Hint: sv_pvn * Always use the SvPV() macro instead of sv_pvn(). */ /* Hint: sv_pvn_force * Always use the SvPV_force() macro instead of sv_pvn_force(). */ /* If these are undefined, they're not handled by the core anyway */ #ifndef SV_IMMEDIATE_UNREF # define SV_IMMEDIATE_UNREF 0 #endif #ifndef SV_GMAGIC # define SV_GMAGIC 0 #endif #ifndef SV_COW_DROP_PV # define SV_COW_DROP_PV 0 #endif #ifndef SV_UTF8_NO_ENCODING # define SV_UTF8_NO_ENCODING 0 #endif #ifndef SV_NOSTEAL # define SV_NOSTEAL 0 #endif #ifndef SV_CONST_RETURN # define SV_CONST_RETURN 0 #endif #ifndef SV_MUTABLE_RETURN # define SV_MUTABLE_RETURN 0 #endif #ifndef SV_SMAGIC # define SV_SMAGIC 0 #endif #ifndef SV_HAS_TRAILING_NUL # define SV_HAS_TRAILING_NUL 0 #endif #ifndef SV_COW_SHARED_HASH_KEYS # define SV_COW_SHARED_HASH_KEYS 0 #endif #if (PERL_BCDVERSION < 0x5007002) #if defined(NEED_sv_2pv_flags) static char * DPPP_(my_sv_2pv_flags)(pTHX_ SV *sv, STRLEN *lp, I32 flags); static #else extern char * DPPP_(my_sv_2pv_flags)(pTHX_ SV *sv, STRLEN *lp, I32 flags); #endif #ifdef sv_2pv_flags # undef sv_2pv_flags #endif #define sv_2pv_flags(a,b,c) DPPP_(my_sv_2pv_flags)(aTHX_ a,b,c) #define Perl_sv_2pv_flags DPPP_(my_sv_2pv_flags) #if defined(NEED_sv_2pv_flags) || defined(NEED_sv_2pv_flags_GLOBAL) char * DPPP_(my_sv_2pv_flags)(pTHX_ SV *sv, STRLEN *lp, I32 flags) { STRLEN n_a = (STRLEN) flags; return sv_2pv(sv, lp ? lp : &n_a); } #endif #if defined(NEED_sv_pvn_force_flags) static char * DPPP_(my_sv_pvn_force_flags)(pTHX_ SV *sv, STRLEN *lp, I32 flags); static #else extern char * DPPP_(my_sv_pvn_force_flags)(pTHX_ SV *sv, STRLEN *lp, I32 flags); #endif #ifdef sv_pvn_force_flags # undef sv_pvn_force_flags #endif #define sv_pvn_force_flags(a,b,c) DPPP_(my_sv_pvn_force_flags)(aTHX_ a,b,c) #define Perl_sv_pvn_force_flags DPPP_(my_sv_pvn_force_flags) #if defined(NEED_sv_pvn_force_flags) || defined(NEED_sv_pvn_force_flags_GLOBAL) char * DPPP_(my_sv_pvn_force_flags)(pTHX_ SV *sv, STRLEN *lp, I32 flags) { STRLEN n_a = (STRLEN) flags; return sv_pvn_force(sv, lp ? lp : &n_a); } #endif #endif #if (PERL_BCDVERSION < 0x5008008) || ( (PERL_BCDVERSION >= 0x5009000) && (PERL_BCDVERSION < 0x5009003) ) # define DPPP_SVPV_NOLEN_LP_ARG &PL_na #else # define DPPP_SVPV_NOLEN_LP_ARG 0 #endif #ifndef SvPV_const # define SvPV_const(sv, lp) SvPV_flags_const(sv, lp, SV_GMAGIC) #endif #ifndef SvPV_mutable # define SvPV_mutable(sv, lp) SvPV_flags_mutable(sv, lp, SV_GMAGIC) #endif #ifndef SvPV_flags # define SvPV_flags(sv, lp, flags) \ ((SvFLAGS(sv) & (SVf_POK)) == SVf_POK \ ? ((lp = SvCUR(sv)), SvPVX(sv)) : sv_2pv_flags(sv, &lp, flags)) #endif #ifndef SvPV_flags_const # define SvPV_flags_const(sv, lp, flags) \ ((SvFLAGS(sv) & (SVf_POK)) == SVf_POK \ ? ((lp = SvCUR(sv)), SvPVX_const(sv)) : \ (const char*) sv_2pv_flags(sv, &lp, flags|SV_CONST_RETURN)) #endif #ifndef SvPV_flags_const_nolen # define SvPV_flags_const_nolen(sv, flags) \ ((SvFLAGS(sv) & (SVf_POK)) == SVf_POK \ ? SvPVX_const(sv) : \ (const char*) sv_2pv_flags(sv, DPPP_SVPV_NOLEN_LP_ARG, flags|SV_CONST_RETURN)) #endif #ifndef SvPV_flags_mutable # define SvPV_flags_mutable(sv, lp, flags) \ ((SvFLAGS(sv) & (SVf_POK)) == SVf_POK \ ? ((lp = SvCUR(sv)), SvPVX_mutable(sv)) : \ sv_2pv_flags(sv, &lp, flags|SV_MUTABLE_RETURN)) #endif #ifndef SvPV_force # define SvPV_force(sv, lp) SvPV_force_flags(sv, lp, SV_GMAGIC) #endif #ifndef SvPV_force_nolen # define SvPV_force_nolen(sv) SvPV_force_flags_nolen(sv, SV_GMAGIC) #endif #ifndef SvPV_force_mutable # define SvPV_force_mutable(sv, lp) SvPV_force_flags_mutable(sv, lp, SV_GMAGIC) #endif #ifndef SvPV_force_nomg # define SvPV_force_nomg(sv, lp) SvPV_force_flags(sv, lp, 0) #endif #ifndef SvPV_force_nomg_nolen # define SvPV_force_nomg_nolen(sv) SvPV_force_flags_nolen(sv, 0) #endif #ifndef SvPV_force_flags # define SvPV_force_flags(sv, lp, flags) \ ((SvFLAGS(sv) & (SVf_POK|SVf_THINKFIRST)) == SVf_POK \ ? ((lp = SvCUR(sv)), SvPVX(sv)) : sv_pvn_force_flags(sv, &lp, flags)) #endif #ifndef SvPV_force_flags_nolen # define SvPV_force_flags_nolen(sv, flags) \ ((SvFLAGS(sv) & (SVf_POK|SVf_THINKFIRST)) == SVf_POK \ ? SvPVX(sv) : sv_pvn_force_flags(sv, DPPP_SVPV_NOLEN_LP_ARG, flags)) #endif #ifndef SvPV_force_flags_mutable # define SvPV_force_flags_mutable(sv, lp, flags) \ ((SvFLAGS(sv) & (SVf_POK|SVf_THINKFIRST)) == SVf_POK \ ? ((lp = SvCUR(sv)), SvPVX_mutable(sv)) \ : sv_pvn_force_flags(sv, &lp, flags|SV_MUTABLE_RETURN)) #endif #ifndef SvPV_nolen # define SvPV_nolen(sv) \ ((SvFLAGS(sv) & (SVf_POK)) == SVf_POK \ ? SvPVX(sv) : sv_2pv_flags(sv, DPPP_SVPV_NOLEN_LP_ARG, SV_GMAGIC)) #endif #ifndef SvPV_nolen_const # define SvPV_nolen_const(sv) \ ((SvFLAGS(sv) & (SVf_POK)) == SVf_POK \ ? SvPVX_const(sv) : sv_2pv_flags(sv, DPPP_SVPV_NOLEN_LP_ARG, SV_GMAGIC|SV_CONST_RETURN)) #endif #ifndef SvPV_nomg # define SvPV_nomg(sv, lp) SvPV_flags(sv, lp, 0) #endif #ifndef SvPV_nomg_const # define SvPV_nomg_const(sv, lp) SvPV_flags_const(sv, lp, 0) #endif #ifndef SvPV_nomg_const_nolen # define SvPV_nomg_const_nolen(sv) SvPV_flags_const_nolen(sv, 0) #endif #ifndef SvPV_renew # define SvPV_renew(sv,n) STMT_START { SvLEN_set(sv, n); \ SvPV_set((sv), (char *) saferealloc( \ (Malloc_t)SvPVX(sv), (MEM_SIZE)((n)))); \ } STMT_END #endif #ifndef SvMAGIC_set # define SvMAGIC_set(sv, val) \ STMT_START { assert(SvTYPE(sv) >= SVt_PVMG); \ (((XPVMG*) SvANY(sv))->xmg_magic = (val)); } STMT_END #endif #if (PERL_BCDVERSION < 0x5009003) #ifndef SvPVX_const # define SvPVX_const(sv) ((const char*) (0 + SvPVX(sv))) #endif #ifndef SvPVX_mutable # define SvPVX_mutable(sv) (0 + SvPVX(sv)) #endif #ifndef SvRV_set # define SvRV_set(sv, val) \ STMT_START { assert(SvTYPE(sv) >= SVt_RV); \ (((XRV*) SvANY(sv))->xrv_rv = (val)); } STMT_END #endif #else #ifndef SvPVX_const # define SvPVX_const(sv) ((const char*)((sv)->sv_u.svu_pv)) #endif #ifndef SvPVX_mutable # define SvPVX_mutable(sv) ((sv)->sv_u.svu_pv) #endif #ifndef SvRV_set # define SvRV_set(sv, val) \ STMT_START { assert(SvTYPE(sv) >= SVt_RV); \ ((sv)->sv_u.svu_rv = (val)); } STMT_END #endif #endif #ifndef SvSTASH_set # define SvSTASH_set(sv, val) \ STMT_START { assert(SvTYPE(sv) >= SVt_PVMG); \ (((XPVMG*) SvANY(sv))->xmg_stash = (val)); } STMT_END #endif #if (PERL_BCDVERSION < 0x5004000) #ifndef SvUV_set # define SvUV_set(sv, val) \ STMT_START { assert(SvTYPE(sv) == SVt_IV || SvTYPE(sv) >= SVt_PVIV); \ (((XPVIV*) SvANY(sv))->xiv_iv = (IV) (val)); } STMT_END #endif #else #ifndef SvUV_set # define SvUV_set(sv, val) \ STMT_START { assert(SvTYPE(sv) == SVt_IV || SvTYPE(sv) >= SVt_PVIV); \ (((XPVUV*) SvANY(sv))->xuv_uv = (val)); } STMT_END #endif #endif #if (PERL_BCDVERSION >= 0x5004000) && !defined(vnewSVpvf) #if defined(NEED_vnewSVpvf) static SV * DPPP_(my_vnewSVpvf)(pTHX_ const char *pat, va_list *args); static #else extern SV * DPPP_(my_vnewSVpvf)(pTHX_ const char *pat, va_list *args); #endif #ifdef vnewSVpvf # undef vnewSVpvf #endif #define vnewSVpvf(a,b) DPPP_(my_vnewSVpvf)(aTHX_ a,b) #define Perl_vnewSVpvf DPPP_(my_vnewSVpvf) #if defined(NEED_vnewSVpvf) || defined(NEED_vnewSVpvf_GLOBAL) SV * DPPP_(my_vnewSVpvf)(pTHX_ const char *pat, va_list *args) { register SV *sv = newSV(0); sv_vsetpvfn(sv, pat, strlen(pat), args, Null(SV**), 0, Null(bool*)); return sv; } #endif #endif #if (PERL_BCDVERSION >= 0x5004000) && !defined(sv_vcatpvf) # define sv_vcatpvf(sv, pat, args) sv_vcatpvfn(sv, pat, strlen(pat), args, Null(SV**), 0, Null(bool*)) #endif #if (PERL_BCDVERSION >= 0x5004000) && !defined(sv_vsetpvf) # define sv_vsetpvf(sv, pat, args) sv_vsetpvfn(sv, pat, strlen(pat), args, Null(SV**), 0, Null(bool*)) #endif #if (PERL_BCDVERSION >= 0x5004000) && !defined(sv_catpvf_mg) #if defined(NEED_sv_catpvf_mg) static void DPPP_(my_sv_catpvf_mg)(pTHX_ SV *sv, const char *pat, ...); static #else extern void DPPP_(my_sv_catpvf_mg)(pTHX_ SV *sv, const char *pat, ...); #endif #define Perl_sv_catpvf_mg DPPP_(my_sv_catpvf_mg) #if defined(NEED_sv_catpvf_mg) || defined(NEED_sv_catpvf_mg_GLOBAL) void DPPP_(my_sv_catpvf_mg)(pTHX_ SV *sv, const char *pat, ...) { va_list args; va_start(args, pat); sv_vcatpvfn(sv, pat, strlen(pat), &args, Null(SV**), 0, Null(bool*)); SvSETMAGIC(sv); va_end(args); } #endif #endif #ifdef PERL_IMPLICIT_CONTEXT #if (PERL_BCDVERSION >= 0x5004000) && !defined(sv_catpvf_mg_nocontext) #if defined(NEED_sv_catpvf_mg_nocontext) static void DPPP_(my_sv_catpvf_mg_nocontext)(SV *sv, const char *pat, ...); static #else extern void DPPP_(my_sv_catpvf_mg_nocontext)(SV *sv, const char *pat, ...); #endif #define sv_catpvf_mg_nocontext DPPP_(my_sv_catpvf_mg_nocontext) #define Perl_sv_catpvf_mg_nocontext DPPP_(my_sv_catpvf_mg_nocontext) #if defined(NEED_sv_catpvf_mg_nocontext) || defined(NEED_sv_catpvf_mg_nocontext_GLOBAL) void DPPP_(my_sv_catpvf_mg_nocontext)(SV *sv, const char *pat, ...) { dTHX; va_list args; va_start(args, pat); sv_vcatpvfn(sv, pat, strlen(pat), &args, Null(SV**), 0, Null(bool*)); SvSETMAGIC(sv); va_end(args); } #endif #endif #endif /* sv_catpvf_mg depends on sv_catpvf_mg_nocontext */ #ifndef sv_catpvf_mg # ifdef PERL_IMPLICIT_CONTEXT # define sv_catpvf_mg Perl_sv_catpvf_mg_nocontext # else # define sv_catpvf_mg Perl_sv_catpvf_mg # endif #endif #if (PERL_BCDVERSION >= 0x5004000) && !defined(sv_vcatpvf_mg) # define sv_vcatpvf_mg(sv, pat, args) \ STMT_START { \ sv_vcatpvfn(sv, pat, strlen(pat), args, Null(SV**), 0, Null(bool*)); \ SvSETMAGIC(sv); \ } STMT_END #endif #if (PERL_BCDVERSION >= 0x5004000) && !defined(sv_setpvf_mg) #if defined(NEED_sv_setpvf_mg) static void DPPP_(my_sv_setpvf_mg)(pTHX_ SV *sv, const char *pat, ...); static #else extern void DPPP_(my_sv_setpvf_mg)(pTHX_ SV *sv, const char *pat, ...); #endif #define Perl_sv_setpvf_mg DPPP_(my_sv_setpvf_mg) #if defined(NEED_sv_setpvf_mg) || defined(NEED_sv_setpvf_mg_GLOBAL) void DPPP_(my_sv_setpvf_mg)(pTHX_ SV *sv, const char *pat, ...) { va_list args; va_start(args, pat); sv_vsetpvfn(sv, pat, strlen(pat), &args, Null(SV**), 0, Null(bool*)); SvSETMAGIC(sv); va_end(args); } #endif #endif #ifdef PERL_IMPLICIT_CONTEXT #if (PERL_BCDVERSION >= 0x5004000) && !defined(sv_setpvf_mg_nocontext) #if defined(NEED_sv_setpvf_mg_nocontext) static void DPPP_(my_sv_setpvf_mg_nocontext)(SV *sv, const char *pat, ...); static #else extern void DPPP_(my_sv_setpvf_mg_nocontext)(SV *sv, const char *pat, ...); #endif #define sv_setpvf_mg_nocontext DPPP_(my_sv_setpvf_mg_nocontext) #define Perl_sv_setpvf_mg_nocontext DPPP_(my_sv_setpvf_mg_nocontext) #if defined(NEED_sv_setpvf_mg_nocontext) || defined(NEED_sv_setpvf_mg_nocontext_GLOBAL) void DPPP_(my_sv_setpvf_mg_nocontext)(SV *sv, const char *pat, ...) { dTHX; va_list args; va_start(args, pat); sv_vsetpvfn(sv, pat, strlen(pat), &args, Null(SV**), 0, Null(bool*)); SvSETMAGIC(sv); va_end(args); } #endif #endif #endif /* sv_setpvf_mg depends on sv_setpvf_mg_nocontext */ #ifndef sv_setpvf_mg # ifdef PERL_IMPLICIT_CONTEXT # define sv_setpvf_mg Perl_sv_setpvf_mg_nocontext # else # define sv_setpvf_mg Perl_sv_setpvf_mg # endif #endif #if (PERL_BCDVERSION >= 0x5004000) && !defined(sv_vsetpvf_mg) # define sv_vsetpvf_mg(sv, pat, args) \ STMT_START { \ sv_vsetpvfn(sv, pat, strlen(pat), args, Null(SV**), 0, Null(bool*)); \ SvSETMAGIC(sv); \ } STMT_END #endif #ifndef newSVpvn_share #if defined(NEED_newSVpvn_share) static SV * DPPP_(my_newSVpvn_share)(pTHX_ const char *src, I32 len, U32 hash); static #else extern SV * DPPP_(my_newSVpvn_share)(pTHX_ const char *src, I32 len, U32 hash); #endif #ifdef newSVpvn_share # undef newSVpvn_share #endif #define newSVpvn_share(a,b,c) DPPP_(my_newSVpvn_share)(aTHX_ a,b,c) #define Perl_newSVpvn_share DPPP_(my_newSVpvn_share) #if defined(NEED_newSVpvn_share) || defined(NEED_newSVpvn_share_GLOBAL) SV * DPPP_(my_newSVpvn_share)(pTHX_ const char *src, I32 len, U32 hash) { SV *sv; if (len < 0) len = -len; if (!hash) PERL_HASH(hash, (char*) src, len); sv = newSVpvn((char *) src, len); sv_upgrade(sv, SVt_PVIV); SvIVX(sv) = hash; SvREADONLY_on(sv); SvPOK_on(sv); return sv; } #endif #endif #ifndef SvSHARED_HASH # define SvSHARED_HASH(sv) (0 + SvUVX(sv)) #endif #ifndef HvNAME_get # define HvNAME_get(hv) HvNAME(hv) #endif #ifndef HvNAMELEN_get # define HvNAMELEN_get(hv) (HvNAME_get(hv) ? (I32)strlen(HvNAME_get(hv)) : 0) #endif #ifndef GvSVn # define GvSVn(gv) GvSV(gv) #endif #ifndef isGV_with_GP # define isGV_with_GP(gv) isGV(gv) #endif #ifndef WARN_ALL # define WARN_ALL 0 #endif #ifndef WARN_CLOSURE # define WARN_CLOSURE 1 #endif #ifndef WARN_DEPRECATED # define WARN_DEPRECATED 2 #endif #ifndef WARN_EXITING # define WARN_EXITING 3 #endif #ifndef WARN_GLOB # define WARN_GLOB 4 #endif #ifndef WARN_IO # define WARN_IO 5 #endif #ifndef WARN_CLOSED # define WARN_CLOSED 6 #endif #ifndef WARN_EXEC # define WARN_EXEC 7 #endif #ifndef WARN_LAYER # define WARN_LAYER 8 #endif #ifndef WARN_NEWLINE # define WARN_NEWLINE 9 #endif #ifndef WARN_PIPE # define WARN_PIPE 10 #endif #ifndef WARN_UNOPENED # define WARN_UNOPENED 11 #endif #ifndef WARN_MISC # define WARN_MISC 12 #endif #ifndef WARN_NUMERIC # define WARN_NUMERIC 13 #endif #ifndef WARN_ONCE # define WARN_ONCE 14 #endif #ifndef WARN_OVERFLOW # define WARN_OVERFLOW 15 #endif #ifndef WARN_PACK # define WARN_PACK 16 #endif #ifndef WARN_PORTABLE # define WARN_PORTABLE 17 #endif #ifndef WARN_RECURSION # define WARN_RECURSION 18 #endif #ifndef WARN_REDEFINE # define WARN_REDEFINE 19 #endif #ifndef WARN_REGEXP # define WARN_REGEXP 20 #endif #ifndef WARN_SEVERE # define WARN_SEVERE 21 #endif #ifndef WARN_DEBUGGING # define WARN_DEBUGGING 22 #endif #ifndef WARN_INPLACE # define WARN_INPLACE 23 #endif #ifndef WARN_INTERNAL # define WARN_INTERNAL 24 #endif #ifndef WARN_MALLOC # define WARN_MALLOC 25 #endif #ifndef WARN_SIGNAL # define WARN_SIGNAL 26 #endif #ifndef WARN_SUBSTR # define WARN_SUBSTR 27 #endif #ifndef WARN_SYNTAX # define WARN_SYNTAX 28 #endif #ifndef WARN_AMBIGUOUS # define WARN_AMBIGUOUS 29 #endif #ifndef WARN_BAREWORD # define WARN_BAREWORD 30 #endif #ifndef WARN_DIGIT # define WARN_DIGIT 31 #endif #ifndef WARN_PARENTHESIS # define WARN_PARENTHESIS 32 #endif #ifndef WARN_PRECEDENCE # define WARN_PRECEDENCE 33 #endif #ifndef WARN_PRINTF # define WARN_PRINTF 34 #endif #ifndef WARN_PROTOTYPE # define WARN_PROTOTYPE 35 #endif #ifndef WARN_QW # define WARN_QW 36 #endif #ifndef WARN_RESERVED # define WARN_RESERVED 37 #endif #ifndef WARN_SEMICOLON # define WARN_SEMICOLON 38 #endif #ifndef WARN_TAINT # define WARN_TAINT 39 #endif #ifndef WARN_THREADS # define WARN_THREADS 40 #endif #ifndef WARN_UNINITIALIZED # define WARN_UNINITIALIZED 41 #endif #ifndef WARN_UNPACK # define WARN_UNPACK 42 #endif #ifndef WARN_UNTIE # define WARN_UNTIE 43 #endif #ifndef WARN_UTF8 # define WARN_UTF8 44 #endif #ifndef WARN_VOID # define WARN_VOID 45 #endif #ifndef WARN_ASSERTIONS # define WARN_ASSERTIONS 46 #endif #ifndef packWARN # define packWARN(a) (a) #endif #ifndef ckWARN # ifdef G_WARN_ON # define ckWARN(a) (PL_dowarn & G_WARN_ON) # else # define ckWARN(a) PL_dowarn # endif #endif #if (PERL_BCDVERSION >= 0x5004000) && !defined(warner) #if defined(NEED_warner) static void DPPP_(my_warner)(U32 err, const char *pat, ...); static #else extern void DPPP_(my_warner)(U32 err, const char *pat, ...); #endif #define Perl_warner DPPP_(my_warner) #if defined(NEED_warner) || defined(NEED_warner_GLOBAL) void DPPP_(my_warner)(U32 err, const char *pat, ...) { SV *sv; va_list args; PERL_UNUSED_ARG(err); va_start(args, pat); sv = vnewSVpvf(pat, &args); va_end(args); sv_2mortal(sv); warn("%s", SvPV_nolen(sv)); } #define warner Perl_warner #define Perl_warner_nocontext Perl_warner #endif #endif /* concatenating with "" ensures that only literal strings are accepted as argument * note that STR_WITH_LEN() can't be used as argument to macros or functions that * under some configurations might be macros */ #ifndef STR_WITH_LEN # define STR_WITH_LEN(s) (s ""), (sizeof(s)-1) #endif #ifndef newSVpvs # define newSVpvs(str) newSVpvn(str "", sizeof(str) - 1) #endif #ifndef newSVpvs_flags # define newSVpvs_flags(str, flags) newSVpvn_flags(str "", sizeof(str) - 1, flags) #endif #ifndef sv_catpvs # define sv_catpvs(sv, str) sv_catpvn(sv, str "", sizeof(str) - 1) #endif #ifndef sv_setpvs # define sv_setpvs(sv, str) sv_setpvn(sv, str "", sizeof(str) - 1) #endif #ifndef hv_fetchs # define hv_fetchs(hv, key, lval) hv_fetch(hv, key "", sizeof(key) - 1, lval) #endif #ifndef hv_stores # define hv_stores(hv, key, val) hv_store(hv, key "", sizeof(key) - 1, val, 0) #endif #ifndef gv_fetchpvn_flags # define gv_fetchpvn_flags(name, len, flags, svt) gv_fetchpv(name, flags, svt) #endif #ifndef gv_fetchpvs # define gv_fetchpvs(name, flags, svt) gv_fetchpvn_flags(name "", sizeof(name) - 1, flags, svt) #endif #ifndef gv_stashpvs # define gv_stashpvs(name, flags) gv_stashpvn(name "", sizeof(name) - 1, flags) #endif #ifndef SvGETMAGIC # define SvGETMAGIC(x) STMT_START { if (SvGMAGICAL(x)) mg_get(x); } STMT_END #endif #ifndef PERL_MAGIC_sv # define PERL_MAGIC_sv '\0' #endif #ifndef PERL_MAGIC_overload # define PERL_MAGIC_overload 'A' #endif #ifndef PERL_MAGIC_overload_elem # define PERL_MAGIC_overload_elem 'a' #endif #ifndef PERL_MAGIC_overload_table # define PERL_MAGIC_overload_table 'c' #endif #ifndef PERL_MAGIC_bm # define PERL_MAGIC_bm 'B' #endif #ifndef PERL_MAGIC_regdata # define PERL_MAGIC_regdata 'D' #endif #ifndef PERL_MAGIC_regdatum # define PERL_MAGIC_regdatum 'd' #endif #ifndef PERL_MAGIC_env # define PERL_MAGIC_env 'E' #endif #ifndef PERL_MAGIC_envelem # define PERL_MAGIC_envelem 'e' #endif #ifndef PERL_MAGIC_fm # define PERL_MAGIC_fm 'f' #endif #ifndef PERL_MAGIC_regex_global # define PERL_MAGIC_regex_global 'g' #endif #ifndef PERL_MAGIC_isa # define PERL_MAGIC_isa 'I' #endif #ifndef PERL_MAGIC_isaelem # define PERL_MAGIC_isaelem 'i' #endif #ifndef PERL_MAGIC_nkeys # define PERL_MAGIC_nkeys 'k' #endif #ifndef PERL_MAGIC_dbfile # define PERL_MAGIC_dbfile 'L' #endif #ifndef PERL_MAGIC_dbline # define PERL_MAGIC_dbline 'l' #endif #ifndef PERL_MAGIC_mutex # define PERL_MAGIC_mutex 'm' #endif #ifndef PERL_MAGIC_shared # define PERL_MAGIC_shared 'N' #endif #ifndef PERL_MAGIC_shared_scalar # define PERL_MAGIC_shared_scalar 'n' #endif #ifndef PERL_MAGIC_collxfrm # define PERL_MAGIC_collxfrm 'o' #endif #ifndef PERL_MAGIC_tied # define PERL_MAGIC_tied 'P' #endif #ifndef PERL_MAGIC_tiedelem # define PERL_MAGIC_tiedelem 'p' #endif #ifndef PERL_MAGIC_tiedscalar # define PERL_MAGIC_tiedscalar 'q' #endif #ifndef PERL_MAGIC_qr # define PERL_MAGIC_qr 'r' #endif #ifndef PERL_MAGIC_sig # define PERL_MAGIC_sig 'S' #endif #ifndef PERL_MAGIC_sigelem # define PERL_MAGIC_sigelem 's' #endif #ifndef PERL_MAGIC_taint # define PERL_MAGIC_taint 't' #endif #ifndef PERL_MAGIC_uvar # define PERL_MAGIC_uvar 'U' #endif #ifndef PERL_MAGIC_uvar_elem # define PERL_MAGIC_uvar_elem 'u' #endif #ifndef PERL_MAGIC_vstring # define PERL_MAGIC_vstring 'V' #endif #ifndef PERL_MAGIC_vec # define PERL_MAGIC_vec 'v' #endif #ifndef PERL_MAGIC_utf8 # define PERL_MAGIC_utf8 'w' #endif #ifndef PERL_MAGIC_substr # define PERL_MAGIC_substr 'x' #endif #ifndef PERL_MAGIC_defelem # define PERL_MAGIC_defelem 'y' #endif #ifndef PERL_MAGIC_glob # define PERL_MAGIC_glob '*' #endif #ifndef PERL_MAGIC_arylen # define PERL_MAGIC_arylen '#' #endif #ifndef PERL_MAGIC_pos # define PERL_MAGIC_pos '.' #endif #ifndef PERL_MAGIC_backref # define PERL_MAGIC_backref '<' #endif #ifndef PERL_MAGIC_ext # define PERL_MAGIC_ext '~' #endif /* That's the best we can do... */ #ifndef sv_catpvn_nomg # define sv_catpvn_nomg sv_catpvn #endif #ifndef sv_catsv_nomg # define sv_catsv_nomg sv_catsv #endif #ifndef sv_setsv_nomg # define sv_setsv_nomg sv_setsv #endif #ifndef sv_pvn_nomg # define sv_pvn_nomg sv_pvn #endif #ifndef SvIV_nomg # define SvIV_nomg SvIV #endif #ifndef SvUV_nomg # define SvUV_nomg SvUV #endif #ifndef sv_catpv_mg # define sv_catpv_mg(sv, ptr) \ STMT_START { \ SV *TeMpSv = sv; \ sv_catpv(TeMpSv,ptr); \ SvSETMAGIC(TeMpSv); \ } STMT_END #endif #ifndef sv_catpvn_mg # define sv_catpvn_mg(sv, ptr, len) \ STMT_START { \ SV *TeMpSv = sv; \ sv_catpvn(TeMpSv,ptr,len); \ SvSETMAGIC(TeMpSv); \ } STMT_END #endif #ifndef sv_catsv_mg # define sv_catsv_mg(dsv, ssv) \ STMT_START { \ SV *TeMpSv = dsv; \ sv_catsv(TeMpSv,ssv); \ SvSETMAGIC(TeMpSv); \ } STMT_END #endif #ifndef sv_setiv_mg # define sv_setiv_mg(sv, i) \ STMT_START { \ SV *TeMpSv = sv; \ sv_setiv(TeMpSv,i); \ SvSETMAGIC(TeMpSv); \ } STMT_END #endif #ifndef sv_setnv_mg # define sv_setnv_mg(sv, num) \ STMT_START { \ SV *TeMpSv = sv; \ sv_setnv(TeMpSv,num); \ SvSETMAGIC(TeMpSv); \ } STMT_END #endif #ifndef sv_setpv_mg # define sv_setpv_mg(sv, ptr) \ STMT_START { \ SV *TeMpSv = sv; \ sv_setpv(TeMpSv,ptr); \ SvSETMAGIC(TeMpSv); \ } STMT_END #endif #ifndef sv_setpvn_mg # define sv_setpvn_mg(sv, ptr, len) \ STMT_START { \ SV *TeMpSv = sv; \ sv_setpvn(TeMpSv,ptr,len); \ SvSETMAGIC(TeMpSv); \ } STMT_END #endif #ifndef sv_setsv_mg # define sv_setsv_mg(dsv, ssv) \ STMT_START { \ SV *TeMpSv = dsv; \ sv_setsv(TeMpSv,ssv); \ SvSETMAGIC(TeMpSv); \ } STMT_END #endif #ifndef sv_setuv_mg # define sv_setuv_mg(sv, i) \ STMT_START { \ SV *TeMpSv = sv; \ sv_setuv(TeMpSv,i); \ SvSETMAGIC(TeMpSv); \ } STMT_END #endif #ifndef sv_usepvn_mg # define sv_usepvn_mg(sv, ptr, len) \ STMT_START { \ SV *TeMpSv = sv; \ sv_usepvn(TeMpSv,ptr,len); \ SvSETMAGIC(TeMpSv); \ } STMT_END #endif #ifndef SvVSTRING_mg # define SvVSTRING_mg(sv) (SvMAGICAL(sv) ? mg_find(sv, PERL_MAGIC_vstring) : NULL) #endif /* Hint: sv_magic_portable * This is a compatibility function that is only available with * Devel::PPPort. It is NOT in the perl core. * Its purpose is to mimic the 5.8.0 behaviour of sv_magic() when * it is being passed a name pointer with namlen == 0. In that * case, perl 5.8.0 and later store the pointer, not a copy of it. * The compatibility can be provided back to perl 5.004. With * earlier versions, the code will not compile. */ #if (PERL_BCDVERSION < 0x5004000) /* code that uses sv_magic_portable will not compile */ #elif (PERL_BCDVERSION < 0x5008000) # define sv_magic_portable(sv, obj, how, name, namlen) \ STMT_START { \ SV *SvMp_sv = (sv); \ char *SvMp_name = (char *) (name); \ I32 SvMp_namlen = (namlen); \ if (SvMp_name && SvMp_namlen == 0) \ { \ MAGIC *mg; \ sv_magic(SvMp_sv, obj, how, 0, 0); \ mg = SvMAGIC(SvMp_sv); \ mg->mg_len = -42; /* XXX: this is the tricky part */ \ mg->mg_ptr = SvMp_name; \ } \ else \ { \ sv_magic(SvMp_sv, obj, how, SvMp_name, SvMp_namlen); \ } \ } STMT_END #else # define sv_magic_portable(a, b, c, d, e) sv_magic(a, b, c, d, e) #endif #ifdef USE_ITHREADS #ifndef CopFILE # define CopFILE(c) ((c)->cop_file) #endif #ifndef CopFILEGV # define CopFILEGV(c) (CopFILE(c) ? gv_fetchfile(CopFILE(c)) : Nullgv) #endif #ifndef CopFILE_set # define CopFILE_set(c,pv) ((c)->cop_file = savepv(pv)) #endif #ifndef CopFILESV # define CopFILESV(c) (CopFILE(c) ? GvSV(gv_fetchfile(CopFILE(c))) : Nullsv) #endif #ifndef CopFILEAV # define CopFILEAV(c) (CopFILE(c) ? GvAV(gv_fetchfile(CopFILE(c))) : Nullav) #endif #ifndef CopSTASHPV # define CopSTASHPV(c) ((c)->cop_stashpv) #endif #ifndef CopSTASHPV_set # define CopSTASHPV_set(c,pv) ((c)->cop_stashpv = ((pv) ? savepv(pv) : Nullch)) #endif #ifndef CopSTASH # define CopSTASH(c) (CopSTASHPV(c) ? gv_stashpv(CopSTASHPV(c),GV_ADD) : Nullhv) #endif #ifndef CopSTASH_set # define CopSTASH_set(c,hv) CopSTASHPV_set(c, (hv) ? HvNAME(hv) : Nullch) #endif #ifndef CopSTASH_eq # define CopSTASH_eq(c,hv) ((hv) && (CopSTASHPV(c) == HvNAME(hv) \ || (CopSTASHPV(c) && HvNAME(hv) \ && strEQ(CopSTASHPV(c), HvNAME(hv))))) #endif #else #ifndef CopFILEGV # define CopFILEGV(c) ((c)->cop_filegv) #endif #ifndef CopFILEGV_set # define CopFILEGV_set(c,gv) ((c)->cop_filegv = (GV*)SvREFCNT_inc(gv)) #endif #ifndef CopFILE_set # define CopFILE_set(c,pv) CopFILEGV_set((c), gv_fetchfile(pv)) #endif #ifndef CopFILESV # define CopFILESV(c) (CopFILEGV(c) ? GvSV(CopFILEGV(c)) : Nullsv) #endif #ifndef CopFILEAV # define CopFILEAV(c) (CopFILEGV(c) ? GvAV(CopFILEGV(c)) : Nullav) #endif #ifndef CopFILE # define CopFILE(c) (CopFILESV(c) ? SvPVX(CopFILESV(c)) : Nullch) #endif #ifndef CopSTASH # define CopSTASH(c) ((c)->cop_stash) #endif #ifndef CopSTASH_set # define CopSTASH_set(c,hv) ((c)->cop_stash = (hv)) #endif #ifndef CopSTASHPV # define CopSTASHPV(c) (CopSTASH(c) ? HvNAME(CopSTASH(c)) : Nullch) #endif #ifndef CopSTASHPV_set # define CopSTASHPV_set(c,pv) CopSTASH_set((c), gv_stashpv(pv,GV_ADD)) #endif #ifndef CopSTASH_eq # define CopSTASH_eq(c,hv) (CopSTASH(c) == (hv)) #endif #endif /* USE_ITHREADS */ #ifndef IN_PERL_COMPILETIME # define IN_PERL_COMPILETIME (PL_curcop == &PL_compiling) #endif #ifndef IN_LOCALE_RUNTIME # define IN_LOCALE_RUNTIME (PL_curcop->op_private & HINT_LOCALE) #endif #ifndef IN_LOCALE_COMPILETIME # define IN_LOCALE_COMPILETIME (PL_hints & HINT_LOCALE) #endif #ifndef IN_LOCALE # define IN_LOCALE (IN_PERL_COMPILETIME ? IN_LOCALE_COMPILETIME : IN_LOCALE_RUNTIME) #endif #ifndef IS_NUMBER_IN_UV # define IS_NUMBER_IN_UV 0x01 #endif #ifndef IS_NUMBER_GREATER_THAN_UV_MAX # define IS_NUMBER_GREATER_THAN_UV_MAX 0x02 #endif #ifndef IS_NUMBER_NOT_INT # define IS_NUMBER_NOT_INT 0x04 #endif #ifndef IS_NUMBER_NEG # define IS_NUMBER_NEG 0x08 #endif #ifndef IS_NUMBER_INFINITY # define IS_NUMBER_INFINITY 0x10 #endif #ifndef IS_NUMBER_NAN # define IS_NUMBER_NAN 0x20 #endif #ifndef GROK_NUMERIC_RADIX # define GROK_NUMERIC_RADIX(sp, send) grok_numeric_radix(sp, send) #endif #ifndef PERL_SCAN_GREATER_THAN_UV_MAX # define PERL_SCAN_GREATER_THAN_UV_MAX 0x02 #endif #ifndef PERL_SCAN_SILENT_ILLDIGIT # define PERL_SCAN_SILENT_ILLDIGIT 0x04 #endif #ifndef PERL_SCAN_ALLOW_UNDERSCORES # define PERL_SCAN_ALLOW_UNDERSCORES 0x01 #endif #ifndef PERL_SCAN_DISALLOW_PREFIX # define PERL_SCAN_DISALLOW_PREFIX 0x02 #endif #ifndef grok_numeric_radix #if defined(NEED_grok_numeric_radix) static bool DPPP_(my_grok_numeric_radix)(pTHX_ const char ** sp, const char * send); static #else extern bool DPPP_(my_grok_numeric_radix)(pTHX_ const char ** sp, const char * send); #endif #ifdef grok_numeric_radix # undef grok_numeric_radix #endif #define grok_numeric_radix(a,b) DPPP_(my_grok_numeric_radix)(aTHX_ a,b) #define Perl_grok_numeric_radix DPPP_(my_grok_numeric_radix) #if defined(NEED_grok_numeric_radix) || defined(NEED_grok_numeric_radix_GLOBAL) bool DPPP_(my_grok_numeric_radix)(pTHX_ const char **sp, const char *send) { #ifdef USE_LOCALE_NUMERIC #ifdef PL_numeric_radix_sv if (PL_numeric_radix_sv && IN_LOCALE) { STRLEN len; char* radix = SvPV(PL_numeric_radix_sv, len); if (*sp + len <= send && memEQ(*sp, radix, len)) { *sp += len; return TRUE; } } #else /* older perls don't have PL_numeric_radix_sv so the radix * must manually be requested from locale.h */ #include dTHR; /* needed for older threaded perls */ struct lconv *lc = localeconv(); char *radix = lc->decimal_point; if (radix && IN_LOCALE) { STRLEN len = strlen(radix); if (*sp + len <= send && memEQ(*sp, radix, len)) { *sp += len; return TRUE; } } #endif #endif /* USE_LOCALE_NUMERIC */ /* always try "." if numeric radix didn't match because * we may have data from different locales mixed */ if (*sp < send && **sp == '.') { ++*sp; return TRUE; } return FALSE; } #endif #endif #ifndef grok_number #if defined(NEED_grok_number) static int DPPP_(my_grok_number)(pTHX_ const char * pv, STRLEN len, UV * valuep); static #else extern int DPPP_(my_grok_number)(pTHX_ const char * pv, STRLEN len, UV * valuep); #endif #ifdef grok_number # undef grok_number #endif #define grok_number(a,b,c) DPPP_(my_grok_number)(aTHX_ a,b,c) #define Perl_grok_number DPPP_(my_grok_number) #if defined(NEED_grok_number) || defined(NEED_grok_number_GLOBAL) int DPPP_(my_grok_number)(pTHX_ const char *pv, STRLEN len, UV *valuep) { const char *s = pv; const char *send = pv + len; const UV max_div_10 = UV_MAX / 10; const char max_mod_10 = UV_MAX % 10; int numtype = 0; int sawinf = 0; int sawnan = 0; while (s < send && isSPACE(*s)) s++; if (s == send) { return 0; } else if (*s == '-') { s++; numtype = IS_NUMBER_NEG; } else if (*s == '+') s++; if (s == send) return 0; /* next must be digit or the radix separator or beginning of infinity */ if (isDIGIT(*s)) { /* UVs are at least 32 bits, so the first 9 decimal digits cannot overflow. */ UV value = *s - '0'; /* This construction seems to be more optimiser friendly. (without it gcc does the isDIGIT test and the *s - '0' separately) With it gcc on arm is managing 6 instructions (6 cycles) per digit. In theory the optimiser could deduce how far to unroll the loop before checking for overflow. */ if (++s < send) { int digit = *s - '0'; if (digit >= 0 && digit <= 9) { value = value * 10 + digit; if (++s < send) { digit = *s - '0'; if (digit >= 0 && digit <= 9) { value = value * 10 + digit; if (++s < send) { digit = *s - '0'; if (digit >= 0 && digit <= 9) { value = value * 10 + digit; if (++s < send) { digit = *s - '0'; if (digit >= 0 && digit <= 9) { value = value * 10 + digit; if (++s < send) { digit = *s - '0'; if (digit >= 0 && digit <= 9) { value = value * 10 + digit; if (++s < send) { digit = *s - '0'; if (digit >= 0 && digit <= 9) { value = value * 10 + digit; if (++s < send) { digit = *s - '0'; if (digit >= 0 && digit <= 9) { value = value * 10 + digit; if (++s < send) { digit = *s - '0'; if (digit >= 0 && digit <= 9) { value = value * 10 + digit; if (++s < send) { /* Now got 9 digits, so need to check each time for overflow. */ digit = *s - '0'; while (digit >= 0 && digit <= 9 && (value < max_div_10 || (value == max_div_10 && digit <= max_mod_10))) { value = value * 10 + digit; if (++s < send) digit = *s - '0'; else break; } if (digit >= 0 && digit <= 9 && (s < send)) { /* value overflowed. skip the remaining digits, don't worry about setting *valuep. */ do { s++; } while (s < send && isDIGIT(*s)); numtype |= IS_NUMBER_GREATER_THAN_UV_MAX; goto skip_value; } } } } } } } } } } } } } } } } } } numtype |= IS_NUMBER_IN_UV; if (valuep) *valuep = value; skip_value: if (GROK_NUMERIC_RADIX(&s, send)) { numtype |= IS_NUMBER_NOT_INT; while (s < send && isDIGIT(*s)) /* optional digits after the radix */ s++; } } else if (GROK_NUMERIC_RADIX(&s, send)) { numtype |= IS_NUMBER_NOT_INT | IS_NUMBER_IN_UV; /* valuep assigned below */ /* no digits before the radix means we need digits after it */ if (s < send && isDIGIT(*s)) { do { s++; } while (s < send && isDIGIT(*s)); if (valuep) { /* integer approximation is valid - it's 0. */ *valuep = 0; } } else return 0; } else if (*s == 'I' || *s == 'i') { s++; if (s == send || (*s != 'N' && *s != 'n')) return 0; s++; if (s == send || (*s != 'F' && *s != 'f')) return 0; s++; if (s < send && (*s == 'I' || *s == 'i')) { s++; if (s == send || (*s != 'N' && *s != 'n')) return 0; s++; if (s == send || (*s != 'I' && *s != 'i')) return 0; s++; if (s == send || (*s != 'T' && *s != 't')) return 0; s++; if (s == send || (*s != 'Y' && *s != 'y')) return 0; s++; } sawinf = 1; } else if (*s == 'N' || *s == 'n') { /* XXX TODO: There are signaling NaNs and quiet NaNs. */ s++; if (s == send || (*s != 'A' && *s != 'a')) return 0; s++; if (s == send || (*s != 'N' && *s != 'n')) return 0; s++; sawnan = 1; } else return 0; if (sawinf) { numtype &= IS_NUMBER_NEG; /* Keep track of sign */ numtype |= IS_NUMBER_INFINITY | IS_NUMBER_NOT_INT; } else if (sawnan) { numtype &= IS_NUMBER_NEG; /* Keep track of sign */ numtype |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT; } else if (s < send) { /* we can have an optional exponent part */ if (*s == 'e' || *s == 'E') { /* The only flag we keep is sign. Blow away any "it's UV" */ numtype &= IS_NUMBER_NEG; numtype |= IS_NUMBER_NOT_INT; s++; if (s < send && (*s == '-' || *s == '+')) s++; if (s < send && isDIGIT(*s)) { do { s++; } while (s < send && isDIGIT(*s)); } else return 0; } } while (s < send && isSPACE(*s)) s++; if (s >= send) return numtype; if (len == 10 && memEQ(pv, "0 but true", 10)) { if (valuep) *valuep = 0; return IS_NUMBER_IN_UV; } return 0; } #endif #endif /* * The grok_* routines have been modified to use warn() instead of * Perl_warner(). Also, 'hexdigit' was the former name of PL_hexdigit, * which is why the stack variable has been renamed to 'xdigit'. */ #ifndef grok_bin #if defined(NEED_grok_bin) static UV DPPP_(my_grok_bin)(pTHX_ const char * start, STRLEN * len_p, I32 * flags, NV * result); static #else extern UV DPPP_(my_grok_bin)(pTHX_ const char * start, STRLEN * len_p, I32 * flags, NV * result); #endif #ifdef grok_bin # undef grok_bin #endif #define grok_bin(a,b,c,d) DPPP_(my_grok_bin)(aTHX_ a,b,c,d) #define Perl_grok_bin DPPP_(my_grok_bin) #if defined(NEED_grok_bin) || defined(NEED_grok_bin_GLOBAL) UV DPPP_(my_grok_bin)(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result) { const char *s = start; STRLEN len = *len_p; UV value = 0; NV value_nv = 0; const UV max_div_2 = UV_MAX / 2; bool allow_underscores = *flags & PERL_SCAN_ALLOW_UNDERSCORES; bool overflowed = FALSE; if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) { /* strip off leading b or 0b. for compatibility silently suffer "b" and "0b" as valid binary numbers. */ if (len >= 1) { if (s[0] == 'b') { s++; len--; } else if (len >= 2 && s[0] == '0' && s[1] == 'b') { s+=2; len-=2; } } } for (; len-- && *s; s++) { char bit = *s; if (bit == '0' || bit == '1') { /* Write it in this wonky order with a goto to attempt to get the compiler to make the common case integer-only loop pretty tight. With gcc seems to be much straighter code than old scan_bin. */ redo: if (!overflowed) { if (value <= max_div_2) { value = (value << 1) | (bit - '0'); continue; } /* Bah. We're just overflowed. */ warn("Integer overflow in binary number"); overflowed = TRUE; value_nv = (NV) value; } value_nv *= 2.0; /* If an NV has not enough bits in its mantissa to * represent a UV this summing of small low-order numbers * is a waste of time (because the NV cannot preserve * the low-order bits anyway): we could just remember when * did we overflow and in the end just multiply value_nv by the * right amount. */ value_nv += (NV)(bit - '0'); continue; } if (bit == '_' && len && allow_underscores && (bit = s[1]) && (bit == '0' || bit == '1')) { --len; ++s; goto redo; } if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT)) warn("Illegal binary digit '%c' ignored", *s); break; } if ( ( overflowed && value_nv > 4294967295.0) #if UVSIZE > 4 || (!overflowed && value > 0xffffffff ) #endif ) { warn("Binary number > 0b11111111111111111111111111111111 non-portable"); } *len_p = s - start; if (!overflowed) { *flags = 0; return value; } *flags = PERL_SCAN_GREATER_THAN_UV_MAX; if (result) *result = value_nv; return UV_MAX; } #endif #endif #ifndef grok_hex #if defined(NEED_grok_hex) static UV DPPP_(my_grok_hex)(pTHX_ const char * start, STRLEN * len_p, I32 * flags, NV * result); static #else extern UV DPPP_(my_grok_hex)(pTHX_ const char * start, STRLEN * len_p, I32 * flags, NV * result); #endif #ifdef grok_hex # undef grok_hex #endif #define grok_hex(a,b,c,d) DPPP_(my_grok_hex)(aTHX_ a,b,c,d) #define Perl_grok_hex DPPP_(my_grok_hex) #if defined(NEED_grok_hex) || defined(NEED_grok_hex_GLOBAL) UV DPPP_(my_grok_hex)(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result) { const char *s = start; STRLEN len = *len_p; UV value = 0; NV value_nv = 0; const UV max_div_16 = UV_MAX / 16; bool allow_underscores = *flags & PERL_SCAN_ALLOW_UNDERSCORES; bool overflowed = FALSE; const char *xdigit; if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) { /* strip off leading x or 0x. for compatibility silently suffer "x" and "0x" as valid hex numbers. */ if (len >= 1) { if (s[0] == 'x') { s++; len--; } else if (len >= 2 && s[0] == '0' && s[1] == 'x') { s+=2; len-=2; } } } for (; len-- && *s; s++) { xdigit = strchr((char *) PL_hexdigit, *s); if (xdigit) { /* Write it in this wonky order with a goto to attempt to get the compiler to make the common case integer-only loop pretty tight. With gcc seems to be much straighter code than old scan_hex. */ redo: if (!overflowed) { if (value <= max_div_16) { value = (value << 4) | ((xdigit - PL_hexdigit) & 15); continue; } warn("Integer overflow in hexadecimal number"); overflowed = TRUE; value_nv = (NV) value; } value_nv *= 16.0; /* If an NV has not enough bits in its mantissa to * represent a UV this summing of small low-order numbers * is a waste of time (because the NV cannot preserve * the low-order bits anyway): we could just remember when * did we overflow and in the end just multiply value_nv by the * right amount of 16-tuples. */ value_nv += (NV)((xdigit - PL_hexdigit) & 15); continue; } if (*s == '_' && len && allow_underscores && s[1] && (xdigit = strchr((char *) PL_hexdigit, s[1]))) { --len; ++s; goto redo; } if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT)) warn("Illegal hexadecimal digit '%c' ignored", *s); break; } if ( ( overflowed && value_nv > 4294967295.0) #if UVSIZE > 4 || (!overflowed && value > 0xffffffff ) #endif ) { warn("Hexadecimal number > 0xffffffff non-portable"); } *len_p = s - start; if (!overflowed) { *flags = 0; return value; } *flags = PERL_SCAN_GREATER_THAN_UV_MAX; if (result) *result = value_nv; return UV_MAX; } #endif #endif #ifndef grok_oct #if defined(NEED_grok_oct) static UV DPPP_(my_grok_oct)(pTHX_ const char * start, STRLEN * len_p, I32 * flags, NV * result); static #else extern UV DPPP_(my_grok_oct)(pTHX_ const char * start, STRLEN * len_p, I32 * flags, NV * result); #endif #ifdef grok_oct # undef grok_oct #endif #define grok_oct(a,b,c,d) DPPP_(my_grok_oct)(aTHX_ a,b,c,d) #define Perl_grok_oct DPPP_(my_grok_oct) #if defined(NEED_grok_oct) || defined(NEED_grok_oct_GLOBAL) UV DPPP_(my_grok_oct)(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result) { const char *s = start; STRLEN len = *len_p; UV value = 0; NV value_nv = 0; const UV max_div_8 = UV_MAX / 8; bool allow_underscores = *flags & PERL_SCAN_ALLOW_UNDERSCORES; bool overflowed = FALSE; for (; len-- && *s; s++) { /* gcc 2.95 optimiser not smart enough to figure that this subtraction out front allows slicker code. */ int digit = *s - '0'; if (digit >= 0 && digit <= 7) { /* Write it in this wonky order with a goto to attempt to get the compiler to make the common case integer-only loop pretty tight. */ redo: if (!overflowed) { if (value <= max_div_8) { value = (value << 3) | digit; continue; } /* Bah. We're just overflowed. */ warn("Integer overflow in octal number"); overflowed = TRUE; value_nv = (NV) value; } value_nv *= 8.0; /* If an NV has not enough bits in its mantissa to * represent a UV this summing of small low-order numbers * is a waste of time (because the NV cannot preserve * the low-order bits anyway): we could just remember when * did we overflow and in the end just multiply value_nv by the * right amount of 8-tuples. */ value_nv += (NV)digit; continue; } if (digit == ('_' - '0') && len && allow_underscores && (digit = s[1] - '0') && (digit >= 0 && digit <= 7)) { --len; ++s; goto redo; } /* Allow \octal to work the DWIM way (that is, stop scanning * as soon as non-octal characters are seen, complain only iff * someone seems to want to use the digits eight and nine). */ if (digit == 8 || digit == 9) { if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT)) warn("Illegal octal digit '%c' ignored", *s); } break; } if ( ( overflowed && value_nv > 4294967295.0) #if UVSIZE > 4 || (!overflowed && value > 0xffffffff ) #endif ) { warn("Octal number > 037777777777 non-portable"); } *len_p = s - start; if (!overflowed) { *flags = 0; return value; } *flags = PERL_SCAN_GREATER_THAN_UV_MAX; if (result) *result = value_nv; return UV_MAX; } #endif #endif #if !defined(my_snprintf) #if defined(NEED_my_snprintf) static int DPPP_(my_my_snprintf)(char * buffer, const Size_t len, const char * format, ...); static #else extern int DPPP_(my_my_snprintf)(char * buffer, const Size_t len, const char * format, ...); #endif #define my_snprintf DPPP_(my_my_snprintf) #define Perl_my_snprintf DPPP_(my_my_snprintf) #if defined(NEED_my_snprintf) || defined(NEED_my_snprintf_GLOBAL) int DPPP_(my_my_snprintf)(char *buffer, const Size_t len, const char *format, ...) { dTHX; int retval; va_list ap; va_start(ap, format); #ifdef HAS_VSNPRINTF retval = vsnprintf(buffer, len, format, ap); #else retval = vsprintf(buffer, format, ap); #endif va_end(ap); if (retval < 0 || (len > 0 && (Size_t)retval >= len)) Perl_croak(aTHX_ "panic: my_snprintf buffer overflow"); return retval; } #endif #endif #if !defined(my_sprintf) #if defined(NEED_my_sprintf) static int DPPP_(my_my_sprintf)(char * buffer, const char * pat, ...); static #else extern int DPPP_(my_my_sprintf)(char * buffer, const char * pat, ...); #endif #define my_sprintf DPPP_(my_my_sprintf) #define Perl_my_sprintf DPPP_(my_my_sprintf) #if defined(NEED_my_sprintf) || defined(NEED_my_sprintf_GLOBAL) int DPPP_(my_my_sprintf)(char *buffer, const char* pat, ...) { va_list args; va_start(args, pat); vsprintf(buffer, pat, args); va_end(args); return strlen(buffer); } #endif #endif #ifdef NO_XSLOCKS # ifdef dJMPENV # define dXCPT dJMPENV; int rEtV = 0 # define XCPT_TRY_START JMPENV_PUSH(rEtV); if (rEtV == 0) # define XCPT_TRY_END JMPENV_POP; # define XCPT_CATCH if (rEtV != 0) # define XCPT_RETHROW JMPENV_JUMP(rEtV) # else # define dXCPT Sigjmp_buf oldTOP; int rEtV = 0 # define XCPT_TRY_START Copy(top_env, oldTOP, 1, Sigjmp_buf); rEtV = Sigsetjmp(top_env, 1); if (rEtV == 0) # define XCPT_TRY_END Copy(oldTOP, top_env, 1, Sigjmp_buf); # define XCPT_CATCH if (rEtV != 0) # define XCPT_RETHROW Siglongjmp(top_env, rEtV) # endif #endif #if !defined(my_strlcat) #if defined(NEED_my_strlcat) static Size_t DPPP_(my_my_strlcat)(char * dst, const char * src, Size_t size); static #else extern Size_t DPPP_(my_my_strlcat)(char * dst, const char * src, Size_t size); #endif #define my_strlcat DPPP_(my_my_strlcat) #define Perl_my_strlcat DPPP_(my_my_strlcat) #if defined(NEED_my_strlcat) || defined(NEED_my_strlcat_GLOBAL) Size_t DPPP_(my_my_strlcat)(char *dst, const char *src, Size_t size) { Size_t used, length, copy; used = strlen(dst); length = strlen(src); if (size > 0 && used < size - 1) { copy = (length >= size - used) ? size - used - 1 : length; memcpy(dst + used, src, copy); dst[used + copy] = '\0'; } return used + length; } #endif #endif #if !defined(my_strlcpy) #if defined(NEED_my_strlcpy) static Size_t DPPP_(my_my_strlcpy)(char * dst, const char * src, Size_t size); static #else extern Size_t DPPP_(my_my_strlcpy)(char * dst, const char * src, Size_t size); #endif #define my_strlcpy DPPP_(my_my_strlcpy) #define Perl_my_strlcpy DPPP_(my_my_strlcpy) #if defined(NEED_my_strlcpy) || defined(NEED_my_strlcpy_GLOBAL) Size_t DPPP_(my_my_strlcpy)(char *dst, const char *src, Size_t size) { Size_t length, copy; length = strlen(src); if (size > 0) { copy = (length >= size) ? size - 1 : length; memcpy(dst, src, copy); dst[copy] = '\0'; } return length; } #endif #endif #ifndef PERL_PV_ESCAPE_QUOTE # define PERL_PV_ESCAPE_QUOTE 0x0001 #endif #ifndef PERL_PV_PRETTY_QUOTE # define PERL_PV_PRETTY_QUOTE PERL_PV_ESCAPE_QUOTE #endif #ifndef PERL_PV_PRETTY_ELLIPSES # define PERL_PV_PRETTY_ELLIPSES 0x0002 #endif #ifndef PERL_PV_PRETTY_LTGT # define PERL_PV_PRETTY_LTGT 0x0004 #endif #ifndef PERL_PV_ESCAPE_FIRSTCHAR # define PERL_PV_ESCAPE_FIRSTCHAR 0x0008 #endif #ifndef PERL_PV_ESCAPE_UNI # define PERL_PV_ESCAPE_UNI 0x0100 #endif #ifndef PERL_PV_ESCAPE_UNI_DETECT # define PERL_PV_ESCAPE_UNI_DETECT 0x0200 #endif #ifndef PERL_PV_ESCAPE_ALL # define PERL_PV_ESCAPE_ALL 0x1000 #endif #ifndef PERL_PV_ESCAPE_NOBACKSLASH # define PERL_PV_ESCAPE_NOBACKSLASH 0x2000 #endif #ifndef PERL_PV_ESCAPE_NOCLEAR # define PERL_PV_ESCAPE_NOCLEAR 0x4000 #endif #ifndef PERL_PV_ESCAPE_RE # define PERL_PV_ESCAPE_RE 0x8000 #endif #ifndef PERL_PV_PRETTY_NOCLEAR # define PERL_PV_PRETTY_NOCLEAR PERL_PV_ESCAPE_NOCLEAR #endif #ifndef PERL_PV_PRETTY_DUMP # define PERL_PV_PRETTY_DUMP PERL_PV_PRETTY_ELLIPSES|PERL_PV_PRETTY_QUOTE #endif #ifndef PERL_PV_PRETTY_REGPROP # define PERL_PV_PRETTY_REGPROP PERL_PV_PRETTY_ELLIPSES|PERL_PV_PRETTY_LTGT|PERL_PV_ESCAPE_RE #endif /* Hint: pv_escape * Note that unicode functionality is only backported to * those perl versions that support it. For older perl * versions, the implementation will fall back to bytes. */ #ifndef pv_escape #if defined(NEED_pv_escape) static char * DPPP_(my_pv_escape)(pTHX_ SV * dsv, char const * const str, const STRLEN count, const STRLEN max, STRLEN * const escaped, const U32 flags); static #else extern char * DPPP_(my_pv_escape)(pTHX_ SV * dsv, char const * const str, const STRLEN count, const STRLEN max, STRLEN * const escaped, const U32 flags); #endif #ifdef pv_escape # undef pv_escape #endif #define pv_escape(a,b,c,d,e,f) DPPP_(my_pv_escape)(aTHX_ a,b,c,d,e,f) #define Perl_pv_escape DPPP_(my_pv_escape) #if defined(NEED_pv_escape) || defined(NEED_pv_escape_GLOBAL) char * DPPP_(my_pv_escape)(pTHX_ SV *dsv, char const * const str, const STRLEN count, const STRLEN max, STRLEN * const escaped, const U32 flags) { const char esc = flags & PERL_PV_ESCAPE_RE ? '%' : '\\'; const char dq = flags & PERL_PV_ESCAPE_QUOTE ? '"' : esc; char octbuf[32] = "%123456789ABCDF"; STRLEN wrote = 0; STRLEN chsize = 0; STRLEN readsize = 1; #if defined(is_utf8_string) && defined(utf8_to_uvchr) bool isuni = flags & PERL_PV_ESCAPE_UNI ? 1 : 0; #endif const char *pv = str; const char * const end = pv + count; octbuf[0] = esc; if (!(flags & PERL_PV_ESCAPE_NOCLEAR)) sv_setpvs(dsv, ""); #if defined(is_utf8_string) && defined(utf8_to_uvchr) if ((flags & PERL_PV_ESCAPE_UNI_DETECT) && is_utf8_string((U8*)pv, count)) isuni = 1; #endif for (; pv < end && (!max || wrote < max) ; pv += readsize) { const UV u = #if defined(is_utf8_string) && defined(utf8_to_uvchr) isuni ? utf8_to_uvchr((U8*)pv, &readsize) : #endif (U8)*pv; const U8 c = (U8)u & 0xFF; if (u > 255 || (flags & PERL_PV_ESCAPE_ALL)) { if (flags & PERL_PV_ESCAPE_FIRSTCHAR) chsize = my_snprintf(octbuf, sizeof octbuf, "%"UVxf, u); else chsize = my_snprintf(octbuf, sizeof octbuf, "%cx{%"UVxf"}", esc, u); } else if (flags & PERL_PV_ESCAPE_NOBACKSLASH) { chsize = 1; } else { if (c == dq || c == esc || !isPRINT(c)) { chsize = 2; switch (c) { case '\\' : /* fallthrough */ case '%' : if (c == esc) octbuf[1] = esc; else chsize = 1; break; case '\v' : octbuf[1] = 'v'; break; case '\t' : octbuf[1] = 't'; break; case '\r' : octbuf[1] = 'r'; break; case '\n' : octbuf[1] = 'n'; break; case '\f' : octbuf[1] = 'f'; break; case '"' : if (dq == '"') octbuf[1] = '"'; else chsize = 1; break; default: chsize = my_snprintf(octbuf, sizeof octbuf, pv < end && isDIGIT((U8)*(pv+readsize)) ? "%c%03o" : "%c%o", esc, c); } } else { chsize = 1; } } if (max && wrote + chsize > max) { break; } else if (chsize > 1) { sv_catpvn(dsv, octbuf, chsize); wrote += chsize; } else { char tmp[2]; my_snprintf(tmp, sizeof tmp, "%c", c); sv_catpvn(dsv, tmp, 1); wrote++; } if (flags & PERL_PV_ESCAPE_FIRSTCHAR) break; } if (escaped != NULL) *escaped= pv - str; return SvPVX(dsv); } #endif #endif #ifndef pv_pretty #if defined(NEED_pv_pretty) static char * DPPP_(my_pv_pretty)(pTHX_ SV * dsv, char const * const str, const STRLEN count, const STRLEN max, char const * const start_color, char const * const end_color, const U32 flags); static #else extern char * DPPP_(my_pv_pretty)(pTHX_ SV * dsv, char const * const str, const STRLEN count, const STRLEN max, char const * const start_color, char const * const end_color, const U32 flags); #endif #ifdef pv_pretty # undef pv_pretty #endif #define pv_pretty(a,b,c,d,e,f,g) DPPP_(my_pv_pretty)(aTHX_ a,b,c,d,e,f,g) #define Perl_pv_pretty DPPP_(my_pv_pretty) #if defined(NEED_pv_pretty) || defined(NEED_pv_pretty_GLOBAL) char * DPPP_(my_pv_pretty)(pTHX_ SV *dsv, char const * const str, const STRLEN count, const STRLEN max, char const * const start_color, char const * const end_color, const U32 flags) { const U8 dq = (flags & PERL_PV_PRETTY_QUOTE) ? '"' : '%'; STRLEN escaped; if (!(flags & PERL_PV_PRETTY_NOCLEAR)) sv_setpvs(dsv, ""); if (dq == '"') sv_catpvs(dsv, "\""); else if (flags & PERL_PV_PRETTY_LTGT) sv_catpvs(dsv, "<"); if (start_color != NULL) sv_catpv(dsv, D_PPP_CONSTPV_ARG(start_color)); pv_escape(dsv, str, count, max, &escaped, flags | PERL_PV_ESCAPE_NOCLEAR); if (end_color != NULL) sv_catpv(dsv, D_PPP_CONSTPV_ARG(end_color)); if (dq == '"') sv_catpvs(dsv, "\""); else if (flags & PERL_PV_PRETTY_LTGT) sv_catpvs(dsv, ">"); if ((flags & PERL_PV_PRETTY_ELLIPSES) && escaped < count) sv_catpvs(dsv, "..."); return SvPVX(dsv); } #endif #endif #ifndef pv_display #if defined(NEED_pv_display) static char * DPPP_(my_pv_display)(pTHX_ SV * dsv, const char * pv, STRLEN cur, STRLEN len, STRLEN pvlim); static #else extern char * DPPP_(my_pv_display)(pTHX_ SV * dsv, const char * pv, STRLEN cur, STRLEN len, STRLEN pvlim); #endif #ifdef pv_display # undef pv_display #endif #define pv_display(a,b,c,d,e) DPPP_(my_pv_display)(aTHX_ a,b,c,d,e) #define Perl_pv_display DPPP_(my_pv_display) #if defined(NEED_pv_display) || defined(NEED_pv_display_GLOBAL) char * DPPP_(my_pv_display)(pTHX_ SV *dsv, const char *pv, STRLEN cur, STRLEN len, STRLEN pvlim) { pv_pretty(dsv, pv, cur, pvlim, NULL, NULL, PERL_PV_PRETTY_DUMP); if (len > cur && pv[cur] == '\0') sv_catpvs(dsv, "\\0"); return SvPVX(dsv); } #endif #endif #endif /* _P_P_PORTABILITY_H_ */ /* End of File ppport.h */ Unicode-LineBreak-Unicode-LineBreak-2016.007_02/sombok/000077500000000000000000000000001273566223400222145ustar00rootroot00000000000000Unicode-LineBreak-Unicode-LineBreak-2016.007_02/t/000077500000000000000000000000001273566223400211655ustar00rootroot00000000000000Unicode-LineBreak-Unicode-LineBreak-2016.007_02/t/000.t000066400000000000000000000004121273566223400216460ustar00rootroot00000000000000# -*- perl -*- # -*- coding: utf-8 -*- use strict; use Test::More; use Unicode::LineBreak qw(:all); BEGIN { plan tests => 1 } diag sprintf "sombok %s with Unicode %s\n", Unicode::LineBreak::SOMBOK_VERSION, Unicode::LineBreak::UNICODE_VERSION; ok(1); Unicode-LineBreak-Unicode-LineBreak-2016.007_02/t/00GraphemeBreakTest.t000066400000000000000000000043041273566223400250500ustar00rootroot00000000000000# -*- perl -*- # -*- coding: utf-8 -*- # # 00GraphemeBreakTest.t - Test suite provided by Unicode Consortium. # # - Passed by GraphemeBreakTest-6.1.0.txt (2011-12-07, 17:54:39 UTC), except # 50 surrogate cases. # - Passed by GraphemeBreakTest-6.2.0d4.txt (2012-06-02, 23:25:40 UTC), except # 58 surrogate cases. [sombok-2.3.0beta1] # - Passed by GraphemeBreakTest-6.2.0d6.txt (2012-08-14, 17:54:56 UTC), except # 54 surrogate cases. [sombok-2.3.0gamma1] # - Passed by GraphemeBreakTest-6.2.0d8.txt (2012-08-22, 12:41:15 UTC), except # 54 surrogate cases. [sombok-2.3.0] # - Passed by GraphemeBreakTest-6.3.0d1.txt (2012-12-20, 22:18:29 UTC), except # 54 surrogate cases. [sombok-2.3.1b] # - Passed by GraphemeBreakTest-7.0.0d13.txt (2013-11-27, 09:54:39 UTC), except # surrogate cases. [sombok-2.3.2beta1] # - Passed by GraphemeBreakTest-8.0.0.txt (2015-02-13, 13:47:15 UTC), except # surrogate cases. [sombok-2.4.0] # # Note: Legacy-CM feature is enabled. # use strict; use Test::More; use Encode qw(decode is_utf8); use Unicode::GCString; BEGIN { my $tests = 0; if (open IN, 'test-data/GraphemeBreakTest.txt') { my $desc = ''; while () { s/\s*#\s*(.*)//; if ($. <= 2) { $desc .= " $1"; chomp $desc; } next unless /\S/; $tests++; } close IN; if ($tests) { plan tests => $tests; diag $desc; } else { plan skip_all => 'test-data/GraphemeBreakTest.txt is empty.'; } } else { plan skip_all => 'test-data/GraphemeBreakTest.txt found at '. 'http://www.unicode.org/Public/ is required.'; } } my @opts = (LegacyCM => 'YES', ViramaAsJoiner => 'NO'); open IN, 'test-data/GraphemeBreakTest.txt'; while () { chomp $_; s/\s*#\s*(.*)$//; my $desc = $1; next unless /\S/; SKIP: { skip "subtests including surrogate", 1 if /\bD[89AB][0-9A-F][0-9A-F]\b/; s/\s*÷$//; s/^÷\s*//; my $s = join '', map { $_ = chr hex "0x$_"; $_ = decode('iso-8859-1', $_) unless is_utf8($_); $_; } split /\s*(?:÷|×)\s*/, $_; is join(' ÷ ', map { join ' × ', map { sprintf '%04X', ord $_ } split //, $_->as_string; } @{Unicode::GCString->new($s, @opts)} ), $_, $desc; } } close IN; Unicode-LineBreak-Unicode-LineBreak-2016.007_02/t/00LineBreakTest.t000066400000000000000000000045461273566223400242170ustar00rootroot00000000000000# -*- perl -*- # -*- coding: utf-8 -*- # # 00LineBreakTest.t - Test suite provided by Unicode Consortium. # # - Passed by LineBreakTest-6.0.0.txt (2010-08-30, 21:08:43 UTC). # - Passed by LineBreakTest-6.1.0d12.txt (2011-09-16, 22:24:58 UTC). # - Passed by LineBreakTest-6.1.0d19.txt (2011-12-07, 01:05:50 UTC). # - 29 subtests failed by LineBreakTest-6.2.0d4.txt (2012-06-02, 23:25:41 UTC). # [sombok-2.3.0beta1] # - Passed by LineBreakTest-6.2.0d6.txt (2012-08-14, 17:54:58 UTC). # [sombok-2.3.0gamma1] # - Passed by LineBreakTest-6.2.0d8.txt (2012-08-22, 12:41:17 UTC). # [sombok-2.3.0] # - Passed by LineBreakTest-6.3.0d1.txt (2012-12-20, 22:18:30 UTC). # [sombok-2.3.1b] # - Passed by LineBreakTest-7.0.0d30.txt (2014-02-19, 15:51:25 UTC). # [sombok-2.3.2beta1] # - Passed by LineBreakTest-8.0.0.txt (2015-04-30, 09:40:15 UTC). # [sombok-2.4.0] # # Note: Legacy-CM feature is disabled. # use strict; use Test::More; use Encode qw(decode is_utf8); use Unicode::LineBreak qw(:all); BEGIN { my $tests = 0; if (open IN, 'test-data/LineBreakTest.txt') { my $desc = ''; while () { s/\s*#\s*(.*)//; if ($. <= 2) { $desc .= " $1"; chomp $desc; } next unless /\S/; $tests++; } close IN; if ($tests) { plan tests => $tests; diag $desc; } else { plan skip_all => 'test-data/LineBreakTest.txt is empty.'; } } else { plan skip_all => 'test-data/LineBreakTest.txt found at '. 'http://www.unicode.org/Public/ is required.'; } } my $lb = Unicode::LineBreak->new( BreakIndent => 'NO', ColMax => 1, EAWidth => [[1..65532] => EA_N], Format => undef, LegacyCM => 'NO', ); open IN, 'test-data/LineBreakTest.txt'; while () { chomp $_; s/\s*#\s*(.*)$//; my $desc = $1; next unless /\S/; s/\s*÷$//; s/^×\s*//; my $s = join '', map { $_ = chr hex "0x$_"; $_ = decode('iso-8859-1', $_) unless is_utf8($_); $_; } split /\s*(?:÷|×)\s*/, $_; my $got = join(' ÷ ', map { join ' × ', map { sprintf '%04X', ord $_ } split //, $_; } $lb->break($s) ); SKIP: { #XXX # Tentative check #XXX my $t = $got; #XXX if ($t =~ s/ × 200D\b/ ÷ 200D/ and $t eq $_) { #XXX diag "Skipped: $desc"; #XXX skip "subtests including debatable ZJ behavior", 1; #XXX } is $got, $_, $desc; } } close IN; Unicode-LineBreak-Unicode-LineBreak-2016.007_02/t/01break.t000066400000000000000000000003011273566223400225710ustar00rootroot00000000000000use strict; use Test::More; require "t/lb.pl"; BEGIN { plan tests => 13 } foreach my $lang (qw(ar el fr he ja ja-a ko ko-decomp ru sa vi vi-decomp zh)) { dotest($lang, $lang); } 1; Unicode-LineBreak-Unicode-LineBreak-2016.007_02/t/02hangul.t000066400000000000000000000002401273566223400227660ustar00rootroot00000000000000use strict; use Test::More; require "t/lb.pl"; BEGIN { plan tests => 2 } dotest('ko', 'ko.al', HangulAsAL => 'YES'); dotest('amitagyong', 'amitagyong'); 1; Unicode-LineBreak-Unicode-LineBreak-2016.007_02/t/03ns.t000066400000000000000000000006041273566223400221350ustar00rootroot00000000000000use strict; use Test::More; require "t/lb.pl"; BEGIN { plan tests => 2 } dotest('ja-k', 'ja-k', ColumnsMax => 72); dotest('ja-k', 'ja-k.ns', LBClass => [KANA_NONSTARTERS() => LB_ID()], ColumnsMax => 72); ## obsoleted option. #dotest('ja-k', 'ja-k.ns', LBClass => [[0x3041..0x30A0] => LB_NS()], # TailorLB => [KANA_NONSTARTERS() => LB_ID()], # ColumnsMax => 72); 1; Unicode-LineBreak-Unicode-LineBreak-2016.007_02/t/04fold.t000066400000000000000000000016011273566223400224400ustar00rootroot00000000000000use strict; use Test::More; require "t/lf.pl"; BEGIN { plan tests => 15 + 2 } foreach my $lang (qw(fr ja quotes)) { do5tests($lang, $lang); } my $in = <new(ColMax => 72); is($lf->fold($in, 'FLOWED'), $out, 'CPAN RT 115146'); is($lf->unfold($out, 'FLOWED'), $in, 'reversal'); 1; Unicode-LineBreak-Unicode-LineBreak-2016.007_02/t/05urgent.t000066400000000000000000000007571273566223400230340ustar00rootroot00000000000000use strict; use Test::More; require "t/lb.pl"; BEGIN { plan tests => 5 } dotest('ecclesiazusae', 'ecclesiazusae'); dotest('ecclesiazusae', 'ecclesiazusae.ColumnsMax', Urgent => 'FORCE'); dotest('ecclesiazusae', 'ecclesiazusae.CharactersMax', CharMax => 79); dotest('ecclesiazusae', 'ecclesiazusae.ColumnsMin', ColMin => 7, ColMax => 66, Urgent => 'FORCE'); eval { dotest('ecclesiazusae', 'ecclesiazusae', Urgent => 'CROAK'); }; ok($@ =~ /^Excessive line was found/, 'CROAK'); 1; Unicode-LineBreak-Unicode-LineBreak-2016.007_02/t/06context.t000066400000000000000000000006361273566223400232110ustar00rootroot00000000000000use strict; use Test::More; require "t/lb.pl"; BEGIN { plan tests => 2 } dotest('fr', 'fr.ea', Context => 'EASTASIAN'); dotest('fr', 'fr', Context => 'EASTASIAN', EAWidth => [AMBIGUOUS_ALPHABETICS() => EA_N()]); ## obsoleted option. #dotest('fr', 'fr', Context => 'EASTASIAN', # EAWidth => [[0x0041..0x005A, 0x0061..0x007A] => EA_A()], # TailorEA => [AMBIGUOUS_ALPHABETICS() => EA_N()]); 1; Unicode-LineBreak-Unicode-LineBreak-2016.007_02/t/07sea.t000066400000000000000000000010361273566223400222710ustar00rootroot00000000000000use strict; use Test::More; require "t/lb.pl"; BEGIN { require Unicode::LineBreak; my $sea = Unicode::LineBreak::SouthEastAsian::supported(); if ($sea) { diag "SA word segmentation supported. $sea"; $sea =~ m{libthai/(\d+)\.(\d+)\.(\d+)}; if (0.001009 <= $1 + $2 * 0.001 + $3 * 0.000001) { plan tests => 1; } else { plan skip_all => "Your libthai is too old (cf. CPAN RT #61922)."; } } else { plan skip_all => "SA word segmentation not supported."; } } dotest('th', 'th', ComplexBreaking => "YES"); 1; Unicode-LineBreak-Unicode-LineBreak-2016.007_02/t/08partial.t000066400000000000000000000007571273566223400231670ustar00rootroot00000000000000use strict; use Test::More; require "t/lb.pl"; BEGIN { plan tests => 15 } foreach my $len (qw(2 76 998)) { foreach my $lang (qw(ja-a amitagyong ecclesiazusae ko-decomp)) { dotest_partial($lang, $lang, $len); } my $sea = Unicode::LineBreak::SouthEastAsian::supported(); if ($sea) { $sea =~ m{libthai/(\d+)\.(\d+)\.(\d+)}; if (0.001009 <= $1 + $2 * 0.001 + $3 * 0.000001) { dotest_partial('th', 'th', $len); next; } } dotest_partial('th', 'th.al', $len); } Unicode-LineBreak-Unicode-LineBreak-2016.007_02/t/09uri.t000066400000000000000000000006141273566223400223230ustar00rootroot00000000000000use strict; use Test::More; require "t/lb.pl"; BEGIN { plan tests => 2 } dotest('uri', 'uri.break', ColumnsMax => 1, Prep => 'BREAKURI'); dotest('uri', 'uri.nonbreak', ColumnsMax => 1, Prep => 'NONBREAKURI'); ## Obsoleted options #dotest('uri', 'uri.break', ColumnsMax => 1, UserBreaking => ['BREAKURI']); #dotest('uri', 'uri.nonbreak', ColumnsMax => 1, UserBreaking => ['NONBREAKURI']); 1; Unicode-LineBreak-Unicode-LineBreak-2016.007_02/t/10gcstring.t000066400000000000000000000037751273566223400233470ustar00rootroot00000000000000use Test::More; use Unicode::GCString; BEGIN { plan tests => 37 } ($s, $r) = (pack('U*', 0x300, 0, 0x0D, 0x41, 0x300, 0x301, 0x3042, 0xD, 0xA, 0xAC00, 0x11A8), pack('U*', 0xAC00, 0x11A8, 0xD, 0xA, 0x3042, 0x41, 0x300, 0x301, 0xD, 0, 0x300)); $string = Unicode::GCString->new($s); is($string->length, 7); is($string->columns, 5); is($string->chars, 11); is($r, Unicode::GCString->new(join '', reverse map {$_->[0]} @{$string})->as_string); $string = Unicode::GCString->new( pack('U*', 0x1112, 0x1161, 0x11AB, 0x1100, 0x1173, 0x11AF)); is($string->length, 2); is($string->columns, 4); is($string->chars, 6); is($string, $string->copy); $s1 = pack('U*', 0x1112, 0x1161); $s2 = pack('U*', 0x11AB, 0x1100, 0x1173, 0x11AF); $g1 = Unicode::GCString->new($s1); $g2 = Unicode::GCString->new($s2); is($g1.$g2, $string); is(($g1.$g2)->length, 2); is(($g1.$g2)->columns, 4); is($string->chars, 6); is($g1.$s2, $string); is(($g1.$s2)->length, 2); is(($g1.$s2)->columns, 4); is($string->chars, 6); is($s1.$g2, $string); is(($s1.$g2)->length, 2); is(($s1.$g2)->columns, 4); is($string->chars, 6); $s1 .= $g2; is($s1, $string); $g1 .= $s2; is($g1, $string); is($string->substr(1), pack('U*', 0x1100, 0x1173, 0x11AF)); is($string->substr(-1), pack('U*', 0x1100, 0x1173, 0x11AF)); is($string->substr(0, -1), pack('U*', 0x1112, 0x1161, 0x11AB)); $string->substr(-1, 1, "A"); is($string, pack('U*', 0x1112, 0x1161, 0x11AB, 0x41)); $string->substr(2, 0, "B"); is($string, pack('U*', 0x1112, 0x1161, 0x11AB, 0x41, 0x42)); $string->substr(0, 0, "C"); is($string, pack('U*', 0x43, 0x1112, 0x1161, 0x11AB, 0x41, 0x42)); @s = (pack('U*', 0x300), pack('U*', 0), pack('U*', 0x0D), pack('U*', 0x41, 0x300, 0x301), pack('U*', 0x3042), pack('U*', 0xD, 0xA), pack('U*', 0xAC00, 0x11A8)); $string = Unicode::GCString->new(join '', @s); while ($gc = <$string>) { is($gc, shift @s); } my $number = Unicode::GCString->new(5); is($number->columns, 1, 'number "5"'); $number = Unicode::GCString->new(0); is($number->columns, 1, 'number "0"'); Unicode-LineBreak-Unicode-LineBreak-2016.007_02/t/11format.t000066400000000000000000000006721273566223400230110ustar00rootroot00000000000000use strict; use Test::More; require "t/lb.pl"; BEGIN { plan tests => 6 } foreach my $lang (qw(fr ja)) { dotest($lang, "$lang.format", Format => sub { return " $_[1]>$_[2]" if $_[1] =~ /^so/; return "<$_[1]\n" if $_[1] =~ /^eo/; undef }); } foreach my $lang (qw(fr ko)) { dotest($lang, "$lang.newline", Format => "NEWLINE"); } foreach my $lang (qw(fr ko)) { dotest($lang, "$lang.newline", Format => "TRIM"); } 1; Unicode-LineBreak-Unicode-LineBreak-2016.007_02/t/12fold2.t000066400000000000000000000002231273566223400225200ustar00rootroot00000000000000use strict; use Test::More; require "t/lf.pl"; BEGIN { plan tests => 2 } foreach my $lang (qw(fr ja)) { dowraptest($lang, $lang); } 1; Unicode-LineBreak-Unicode-LineBreak-2016.007_02/t/13flowedsp.t000066400000000000000000000002371273566223400233430ustar00rootroot00000000000000use strict; use Test::More; require "t/lf.pl"; BEGIN { plan tests => 1 } foreach my $lang (qw(flowedsp)) { dounfoldtest($lang, $lang, 'FLOWEDSP'); } 1; Unicode-LineBreak-Unicode-LineBreak-2016.007_02/t/14sea_al.t000066400000000000000000000002001273566223400227330ustar00rootroot00000000000000use strict; use Test::More; require "t/lb.pl"; BEGIN { plan tests => 1 } dotest('th', 'th.al', ComplexBreaking => "NO"); 1; Unicode-LineBreak-Unicode-LineBreak-2016.007_02/t/15array.t000066400000000000000000000020251273566223400226350ustar00rootroot00000000000000use strict; use Test::More; require "t/lb.pl"; BEGIN { plan tests => 19 } # break foreach my $lang (qw(ar el fr ja ja-a ko ru zh)) { dotest_array($lang, $lang); } # urgent dotest_array('ecclesiazusae', 'ecclesiazusae'); dotest_array('ecclesiazusae', 'ecclesiazusae.ColumnsMax', Urgent => 'FORCE'); dotest_array('ecclesiazusae', 'ecclesiazusae.CharactersMax', CharMax => 79); dotest_array('ecclesiazusae', 'ecclesiazusae.ColumnsMin', ColMin => 7, ColMax => 66, Urgent => 'FORCE'); eval { dotest_array('ecclesiazusae', 'ecclesiazusae', Urgent => 'CROAK'); }; ok($@ =~ /^Excessive line was found/, 'CROAK'); # format foreach my $lang (qw(fr ja)) { dotest_array($lang, "$lang.format", Format => sub { return " $_[1]>$_[2]" if $_[1] =~ /^so/; return "<$_[1]\n" if $_[1] =~ /^eo/; undef }); } foreach my $lang (qw(fr ko)) { dotest_array($lang, "$lang.newline", Format => "NEWLINE"); } foreach my $lang (qw(fr ko)) { dotest_array($lang, "$lang.newline", Format => "TRIM"); } 1; Unicode-LineBreak-Unicode-LineBreak-2016.007_02/t/16regex.t000066400000000000000000000033471273566223400226420ustar00rootroot00000000000000use strict; use Test::More; require "t/lb.pl"; my $splitre; BEGIN { $splitre = eval q{ qr{ (?<=^url:) | (?<=[/]) (?=[^/]) | (?<=[^-.]) (?=[-~.,_?\#%=&]) | (?<=[=&]) (?=.) }iox }; if ($@) { diag $@; plan skip_all => "Perl may have a bug (cf. perlbug #82302)."; } else { plan tests => 6; } } # Regex matching most of URL-like strings. my $URIre = qr{ \b (?:url:)? (?:[a-z][-0-9a-z+.]+://|news:|mailto:) [\x21-\x7E]+ }iox; # Breaking URIs according to some CMoS rules. sub breakURI { # 17.11 1.1: [/] ÷ [^/] # 17.11 2: [-] × # 6.17 2: [.] × # 17.11 1.2: ÷ [-~.,_?#%] # 17.11 1.3: ÷ [=&] # 17.11 1.3: [=&] ÷ # Default: ALL × ALL my @c = split m{$splitre}, $_[1]; # Won't break punctuations at end of matches. while (2 <= scalar @c and $c[$#c] =~ /^[\".:;,>]+$/) { my $c = pop @c; $c[$#c] .= $c; } @c; } # [REGEX, SUB] pair dotest('uri', 'uri.break', ColumnsMax => 1, Prep => [$URIre, \&breakURI]); dotest('uri', 'uri.nonbreak', ColumnsMax => 1, Prep => [$URIre, sub { ($_[1]) }]); # [STRING, SUB] pair dotest('uri', 'uri.nonbreak', ColumnsMax => 1, Prep => ["$URIre", sub { ($_[1]) }]); # multiple patterns dotest('uri', 'uri.break', ColumnsMax => 1, Prep => [$URIre, \&breakURI], Prep => [qr{ftp://[\x21-\x7e]+}, sub { ($_[1]) } ]); dotest('uri', 'uri.break.http', ColumnsMax => 1, Prep => [qr{ftp://[\x21-\x7e]+}, sub { ($_[1]) } ], Prep => [$URIre, \&breakURI]); dotest('uri', 'uri.nonbreak', ColumnsMax => 1, Prep => [qr{ftp://[\x21-\x7e]+}, sub { ($_[1]) } ], Prep => [qr{http://[\x21-\x7e]+}, sub { ($_[1]) } ], Prep => [$URIre, \&breakURI]); 1; Unicode-LineBreak-Unicode-LineBreak-2016.007_02/t/17prop.t000066400000000000000000000026251273566223400225070ustar00rootroot00000000000000use strict; use Test::More; require 't/lb.pl'; BEGIN { plan tests => 12 } my @opts = (Context => 'EASTASIAN'); is(Unicode::GCString->new(Encode::decode('iso-8859-1', "\xA0"), @opts)->lbc, Unicode::LineBreak::LB_GL()); is(Unicode::GCString->new(Encode::decode('iso-8859-1', "\xC2\xA0"), @opts)->lbc, Unicode::LineBreak::LB_AL()); is(Unicode::GCString->new(Encode::decode('iso-8859-1', "\xD7"), @opts)->columns, 2); is(Unicode::GCString->new(Encode::decode('iso-8859-1', "\xC3"), @opts)->columns, 1); ### obsoleted functions ##my $lb = Unicode::LineBreak->new(@opts); ## ##foreach my $s (("\xA0", "\x{A0}", Encode::decode('iso-8859-1', "\xA0"), ## )) { ## is($lb->lbclass($s), Unicode::LineBreak::LB_GL()); ##} ##is($lb->lbclass("\xC2\xA0"), Unicode::LineBreak::LB_AL()); ##foreach my $s (("\xD7", "\x{D7}", Encode::decode('iso-8859-1', "\xD7"), ## )) { ## is($lb->eawidth($s), Unicode::LineBreak::EA_F()); ##} ##is($lb->eawidth("\xC3\x97"), Unicode::LineBreak::EA_N()); my $lb = Unicode::LineBreak->new(@opts); foreach my $s ("\xA0", "\x{A0}", Encode::decode('iso-8859-1', "\xA0")) { is(Unicode::GCString->new($s)->lbc, Unicode::LineBreak::LB_GL()); } is(Unicode::GCString->new("\xC2\xA0")->lbc, Unicode::LineBreak::LB_AL()); foreach my $s ("\xD7", "\x{D7}", Encode::decode('iso-8859-1', "\xD7")) { is(Unicode::GCString->new($s)->columns, 1); } is(Unicode::GCString->new("\xC2\xA0")->columns, 2); Unicode-LineBreak-Unicode-LineBreak-2016.007_02/t/18currency.t000066400000000000000000000017061273566223400233610ustar00rootroot00000000000000use strict; use Test::More; require 't/lb.pl'; BEGIN { plan tests => 96 } my @opts = (Context => 'EASTASIAN'); foreach my $c (0x20A0..0x20CF) { my $gc = Unicode::GCString->new(sprintf('%c', $c), @opts); if ($c == 0x20A9) { is($gc->columns, 1, 'U+20A9 WON SIGN eaw:H'); } elsif ($c == 0x20AC) { is($gc->columns, 2, 'U+20AC EURO SIGN eaw:A'); } else { is($gc->columns, 1, sprintf 'U+%04X eaw:N', $c); } if ($c == 0x20A7) { is($gc->lbc, Unicode::LineBreak::LB_PO(), 'U+20A7 PESETA SIGN lbc:PO'); } elsif ($c == 0x20B6) { is($gc->lbc, Unicode::LineBreak::LB_PO(), 'U+20B6 LIVRE TOURNOIS SIGN lbc:PO'); } elsif ($c == 0x20BB) { is($gc->lbc, Unicode::LineBreak::LB_PO(), 'U+20BB NORDIC MARK SIGN lbc:PO'); } elsif ($c == 0x20BE) { is($gc->lbc, Unicode::LineBreak::LB_PO(), 'U+20BE LARI SIGN lbc:PO'); } else { is($gc->lbc, Unicode::LineBreak::LB_PR(), sprintf 'U+%04X lbc:PR', $c); } } Unicode-LineBreak-Unicode-LineBreak-2016.007_02/t/lb.pl000066400000000000000000000044121273566223400221200ustar00rootroot00000000000000use strict; use Encode qw(decode_utf8 encode_utf8); use Unicode::LineBreak qw(:all); @Unicode::LineBreak::Config = ( CharMax => 998, ColMax => 76, ColMin => 0, Context => 'NONEASTASIAN', EAWidth => [[0x302E, 0x302F] => EA_Z()], # 6.1.0: Changed from Mn to Mc. Format => 'SIMPLE', HangulAsAL => 'NO', LBClass => undef, LegacyCM => "YES", Newline => "\n", Prep => undef, Sizing => "UAX11", Urgent => undef, ); sub dotest { my $in = shift; my $out = shift; open IN, "); close IN; my $lb = Unicode::LineBreak->new(@_); my $broken = encode_utf8($lb->break($instring)); my $outstring = ''; if (open OUT, "; close OUT; } else { open XXX, ">test-data/$out.xxx"; print XXX $broken; close XXX; } is($broken, $outstring, "$in --> $out"); } sub dotest_partial { my $in = shift; my $out = shift; my $len = shift; my $lb = Unicode::LineBreak->new(@_); open IN, "); close IN; my $broken = ''; while ($instring) { my $p = substr($instring, 0, $len); if (length $instring < $len) { $instring = ''; } else { $instring = substr($instring, $len); } $broken .= encode_utf8($lb->break_partial($p)); } $broken .= encode_utf8($lb->break_partial(undef)); my $outstring = ''; if (open OUT, "; close OUT; } else { open XXX, ">test-data/$out.xxx"; print XXX $broken; close XXX; } is($broken, $outstring, "$in --> $out, length $len"); } sub dotest_array { my $in = shift; my $out = shift; open IN, "); close IN; my $lb = Unicode::LineBreak->new(@_); my @broken = map { encode_utf8("$_") } $lb->break($instring); my @outstring = (); if (open OUT, "; close OUT; } else { open XXX, ">test-data/$out.xxx"; print XXX join '', @broken; close XXX; } is_deeply(\@broken, \@outstring, "$in --> $out"); } 1; Unicode-LineBreak-Unicode-LineBreak-2016.007_02/t/lf.pl000066400000000000000000000041331273566223400221240ustar00rootroot00000000000000use strict; use Text::LineFold; sub dounfoldtest { my $in = shift; my $out = shift; my $method = shift; open IN, "; close IN; my $lf = Text::LineFold->new(@_); my $unfolded = $lf->unfold($instring, $method); my $outstring = ''; if (open OUT, "; close OUT; } else { open XXX, ">test-data/$out.xxx"; print XXX $unfolded; close XXX; } is($unfolded, $outstring, "unfold $in, method=$method"); } sub do5tests { my $in = shift; my $out = shift; open IN, "; close IN; my $lf = Text::LineFold->new(@_); my %folded = (); foreach my $method (qw(PLAIN FIXED FLOWED)) { $folded{$method} = $lf->fold($instring, $method); my $outstring = ''; if (open OUT, "; close OUT; } else { open XXX, ">test-data/$out.".(lc $method).".xxx"; print XXX $folded{$method}; close XXX; } is($folded{$method}, $outstring, "fold $in, method=$method"); } foreach my $method (qw(FIXED FLOWED)) { my $outstring = $lf->unfold($folded{$method}, $method); if (open IN, "; close IN; } is($outstring, $instring, "unfold $out, method=$method"); #XXXopen XXX, ">test-data/$out.".(lc $method).".xxx"; #XXXprint XXX $outstring; #XXXclose XXX; } } sub dowraptest { my $in = shift; my $out = shift; open IN, "; close IN; my $lf = Text::LineFold->new(@_); my $folded = $lf->fold("\t", ' ' x 4, $instring); my $outstring = ''; if (open OUT, "; close OUT; } else { open XXX, ">test-data/$out.wrap.xxx"; print XXX $folded; close XXX; } is($folded, $outstring, "wrap $in"); } 1; Unicode-LineBreak-Unicode-LineBreak-2016.007_02/t/pod.t000066400000000000000000000002271273566223400221350ustar00rootroot00000000000000use strict; use Test::More; eval "use Test::Pod 1.00"; plan skip_all => "Test::Pod 1.00 or later required for testing POD" if $@; all_pod_files_ok(); Unicode-LineBreak-Unicode-LineBreak-2016.007_02/test-data/000077500000000000000000000000001273566223400226105ustar00rootroot00000000000000Unicode-LineBreak-Unicode-LineBreak-2016.007_02/typemap000066400000000000000000000054521273566223400223320ustar00rootroot00000000000000# Typemap for Unicode-LineBreak # ============================= # # Copyright (C) 2009-2013 by Hatuka*nezumi - IKEDA Soji . # # This package is free software; you can redistribute it and/or modify it # under the same terms as Perl itself. # # Notes # ----- # - A type generic_string has INPUT map from generic string (utf8-flagged # string or Unicode::GCString object) to gcstring_t *. When mapped from # utf8-flagged string, result is ``mortal''. # - A type unistr_t * has INPUT map from Perl string (string must be # UTF8-flagged or must contain 7-bit chars only). Result is ``mortal''. # - generic_string has OUTPUT map to Unicode::GCString. For unistr_t * # OUTPUT map has not been implemented yet. TYPEMAP unichar_t T_UV propval_t T_U_CHAR swapspec_t T_SWAPSPEC linebreak_t * T_UNICODE_LINEBREAK generic_string T_UNICODE_GCSTRING gcstring_t * T_UNICODE_GCSTRING unistr_t * T_UNICODE_GCSTRING INPUT T_SWAPSPEC if (SvOK($arg)) $var = (IV)SvIV($arg); else $var = -1 T_UNICODE_LINEBREAK if (! sv_isobject($arg)) croak(\"$func_name: Not object\"); else if (sv_derived_from($arg, \"Unicode::LineBreak\")) $var = PerltoC(linebreak_t *, $arg); else croak(\"$func_name: Unknown object \%s\", HvNAME(SvSTASH(SvRV($arg)))) T_UNICODE_GCSTRING if (! SvOK($arg)) $var = NULL; else${ my $mycode = ($type eq q) ? qq< if (! sv_isobject($arg)) { unistr_t unistr = { NULL, 0 }; /* Generic string must be well-formed. */ SVtounistr(&unistr, $arg); if (($var = gcstring_new(&unistr, lbobj)) == NULL) croak(\"$func_name: %s\", strerror(errno)); /* let Unicode buffer be mortal. */ sv_2mortal(CtoPerl(\"Unicode::GCString\", $var)); \#undef lbobj } else> : q<>; \$mycode }${ my $mycode = ($type =~ /^unistr_t\s*\*$/) ? qq< if (! sv_isobject($arg)) { gcstring_t *gcstr; /* container of buffer. */ if ((gcstr = malloc(sizeof(gcstring_t))) == NULL) croak(\"$func_name: %s\", strerror(errno)); memset(gcstr, 0, sizeof(gcstring_t)); /* String not being decoded must be treated as Unicode. */ if (! SvUTF8($arg)) SVupgradetounistr((unistr_t *)gcstr, $arg); else SVtounistr((unistr_t *)gcstr, $arg); $var = (unistr_t *)gcstr; /* let Unicode buffer be mortal. */ sv_2mortal(CtoPerl(\"Unicode::GCString\", gcstr)); } else> : q<>; \$mycode} if (sv_derived_from($arg, \"Unicode::GCString\")) $var = ($type)PerltoC(gcstring_t *, $arg); else croak(\"$func_name: Unknown object \%s\", HvNAME(SvSTASH(SvRV($arg)))) OUTPUT T_UNICODE_LINEBREAK setCtoPerl($arg, \"Unicode::LineBreak\", $var); T_UNICODE_GCSTRING ${ my $mycode = ($type =~ /^unistr_t\s*\*$/) ? qq<\#error OUTPUT typemap has not been implemented yet.> : qq; \$mycode }