pax_global_header00006660000000000000000000000064126102626710014515gustar00rootroot0000000000000052 comment=5bf25fbfde4a2ecfeb9214d1ee05297b5cad9395 ocaml-csv-1.4.2/000077500000000000000000000000001261026267100134055ustar00rootroot00000000000000ocaml-csv-1.4.2/.gitignore000066400000000000000000000004651261026267100154020ustar00rootroot00000000000000# -*-conf-unix-*- *.cmi *.cmo *.cma *.cmx *.cmxa *.cmxs *.annot .depend.ocaml *.com *.exe *.native META examples/csvtool csv.godiva doc # Generated by OASIS or ocamlbuild: API.odocl AUTHORS.txt README.txt _build/ myocamlbuild.ml setup.data setup.log setup.ml src/csv.mllib API.docdir # Tarball *.tar.gz ocaml-csv-1.4.2/.travis-ci.sh000066400000000000000000000004501261026267100157170ustar00rootroot00000000000000 OPAM_PKGS="oasis base-bytes" export OPAMYES=1 if [ -f "$HOME/.opam/config" ]; then opam update opam upgrade else opam init fi if [ -n "${OPAM_SWITCH}" ]; then opam switch ${OPAM_SWITCH} fi eval `opam config env` opam install $OPAM_PKGS export OCAMLRUNPARAM=b make make test ocaml-csv-1.4.2/.travis.yml000066400000000000000000000004501261026267100155150ustar00rootroot00000000000000language: c sudo: false addons: apt: sources: - avsm packages: - ocaml - ocaml-native-compilers - opam script: bash -ex .travis-ci.sh env: - OPAM_SWITCH=system - OPAM_SWITCH=3.12.1 - OPAM_SWITCH=4.01.0 - OPAM_SWITCH=4.02.2 cache: directories: - $HOME/.opam ocaml-csv-1.4.2/INSTALL.txt000066400000000000000000000016611261026267100152600ustar00rootroot00000000000000(* OASIS_START *) (* DO NOT EDIT (digest: aeed0b6f112359d79bc311b45f9c68e2) *) This is the INSTALL file for the csv distribution. This package uses OASIS to generate its build system. See section OASIS for full information. Dependencies ============ In order to compile this package, you will need: * ocaml for all, doc API * findlib Installing ========== 1. Uncompress the source archive and go to the root of the package 2. Run 'ocaml setup.ml -configure' 3. Run 'ocaml setup.ml -build' 4. Run 'ocaml setup.ml -install' Uninstalling ============ 1. Go to the root of the package 2. Run 'ocaml setup.ml -uninstall' OASIS ===== OASIS is a program that generates a setup.ml file using a simple '_oasis' configuration file. The generated setup only depends on the standard OCaml installation: no additional library is required. (* OASIS_STOP *) Testing ======= If you would like to run some tests, just issue 'ocaml setup.ml -test'. ocaml-csv-1.4.2/LICENSE.txt000066400000000000000000000653451261026267100152450ustar00rootroot00000000000000This Library is distributed under the terms of the GNU Lesser General Public License version 2.1 (included below). As a special exception to the GNU Lesser General Public License, you may link, statically or dynamically, a "work that uses the Library" with a publicly distributed version of the Library to produce an executable file containing portions of the Library, and distribute that executable file under terms of your choice, without any of the additional requirements listed in clause 6 of the GNU Lesser General Public License. By "a publicly distributed version of the Library", we mean either the unmodified Library as distributed, or a modified version of the Library that is distributed under the conditions defined in clause 3 of the GNU Library General Public License. This exception does not however invalidate any other reasons why the executable file might be covered by the GNU Lesser General Public License. GNU LESSER GENERAL PUBLIC LICENSE Version 2.1, February 1999 Copyright (C) 1991, 1999 Free Software Foundation, Inc. 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. [This is the first released version of the Lesser GPL. It also counts as the successor of the GNU Library Public License, version 2, hence the version number 2.1.] Preamble The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public Licenses are intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This license, the Lesser General Public License, applies to some specially designated software packages--typically libraries--of the Free Software Foundation and other authors who decide to use it. You can use it too, but we suggest you first think carefully about whether this license or the ordinary General Public License is the better strategy to use in any particular case, based on the explanations below. When we speak of free software, we are referring to freedom of use, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish); that you receive source code or can get it if you want it; that you can change the software and use pieces of it in new free programs; and that you are informed that you can do these things. To protect your rights, we need to make restrictions that forbid distributors to deny you these rights or to ask you to surrender these rights. These restrictions translate to certain responsibilities for you if you distribute copies of the library or if you modify it. For example, if you distribute copies of the library, whether gratis or for a fee, you must give the recipients all the rights that we gave you. You must make sure that they, too, receive or can get the source code. If you link other code with the library, you must provide complete object files to the recipients, so that they can relink them with the library after making changes to the library and recompiling it. And you must show them these terms so they know their rights. We protect your rights with a two-step method: (1) we copyright the library, and (2) we offer you this license, which gives you legal permission to copy, distribute and/or modify the library. To protect each distributor, we want to make it very clear that there is no warranty for the free library. Also, if the library is modified by someone else and passed on, the recipients should know that what they have is not the original version, so that the original author's reputation will not be affected by problems that might be introduced by others. Finally, software patents pose a constant threat to the existence of any free program. We wish to make sure that a company cannot effectively restrict the users of a free program by obtaining a restrictive license from a patent holder. Therefore, we insist that any patent license obtained for a version of the library must be consistent with the full freedom of use specified in this license. Most GNU software, including some libraries, is covered by the ordinary GNU General Public License. This license, the GNU Lesser General Public License, applies to certain designated libraries, and is quite different from the ordinary General Public License. We use this license for certain libraries in order to permit linking those libraries into non-free programs. When a program is linked with a library, whether statically or using a shared library, the combination of the two is legally speaking a combined work, a derivative of the original library. The ordinary General Public License therefore permits such linking only if the entire combination fits its criteria of freedom. The Lesser General Public License permits more lax criteria for linking other code with the library. We call this license the "Lesser" General Public License because it does Less to protect the user's freedom than the ordinary General Public License. It also provides other free software developers Less of an advantage over competing non-free programs. These disadvantages are the reason we use the ordinary General Public License for many libraries. However, the Lesser license provides advantages in certain special circumstances. For example, on rare occasions, there may be a special need to encourage the widest possible use of a certain library, so that it becomes a de-facto standard. To achieve this, non-free programs must be allowed to use the library. A more frequent case is that a free library does the same job as widely used non-free libraries. In this case, there is little to gain by limiting the free library to free software only, so we use the Lesser General Public License. In other cases, permission to use a particular library in non-free programs enables a greater number of people to use a large body of free software. For example, permission to use the GNU C Library in non-free programs enables many more people to use the whole GNU operating system, as well as its variant, the GNU/Linux operating system. Although the Lesser General Public License is Less protective of the users' freedom, it does ensure that the user of a program that is linked with the Library has the freedom and the wherewithal to run that program using a modified version of the Library. The precise terms and conditions for copying, distribution and modification follow. Pay close attention to the difference between a "work based on the library" and a "work that uses the library". The former contains code derived from the library, whereas the latter must be combined with the library in order to run. GNU LESSER GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License Agreement applies to any software library or other program which contains a notice placed by the copyright holder or other authorized party saying it may be distributed under the terms of this Lesser General Public License (also called "this License"). Each licensee is addressed as "you". A "library" means a collection of software functions and/or data prepared so as to be conveniently linked with application programs (which use some of those functions and data) to form executables. The "Library", below, refers to any such software library or work which has been distributed under these terms. A "work based on the Library" means either the Library or any derivative work under copyright law: that is to say, a work containing the Library or a portion of it, either verbatim or with modifications and/or translated straightforwardly into another language. (Hereinafter, translation is included without limitation in the term "modification".) "Source code" for a work means the preferred form of the work for making modifications to it. For a library, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the library. Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running a program using the Library is not restricted, and output from such a program is covered only if its contents constitute a work based on the Library (independent of the use of the Library in a tool for writing it). Whether that is true depends on what the Library does and what the program that uses the Library does. 1. You may copy and distribute verbatim copies of the Library's complete source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and distribute a copy of this License along with the Library. You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. 2. You may modify your copy or copies of the Library or any portion of it, thus forming a work based on the Library, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: a) The modified work must itself be a software library. b) You must cause the files modified to carry prominent notices stating that you changed the files and the date of any change. c) You must cause the whole of the work to be licensed at no charge to all third parties under the terms of this License. d) If a facility in the modified Library refers to a function or a table of data to be supplied by an application program that uses the facility, other than as an argument passed when the facility is invoked, then you must make a good faith effort to ensure that, in the event an application does not supply such function or table, the facility still operates, and performs whatever part of its purpose remains meaningful. (For example, a function in a library to compute square roots has a purpose that is entirely well-defined independent of the application. Therefore, Subsection 2d requires that any application-supplied function or table used by this function must be optional: if the application does not supply it, the square root function must still compute square roots.) These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Library, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Library, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Library. In addition, mere aggregation of another work not based on the Library with the Library (or with a work based on the Library) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. 3. You may opt to apply the terms of the ordinary GNU General Public License instead of this License to a given copy of the Library. To do this, you must alter all the notices that refer to this License, so that they refer to the ordinary GNU General Public License, version 2, instead of to this License. (If a newer version than version 2 of the ordinary GNU General Public License has appeared, then you can specify that version instead if you wish.) Do not make any other change in these notices. Once this change is made in a given copy, it is irreversible for that copy, so the ordinary GNU General Public License applies to all subsequent copies and derivative works made from that copy. This option is useful when you wish to copy part of the code of the Library into a program that is not a library. 4. You may copy and distribute the Library (or a portion or derivative of it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange. If distribution of object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place satisfies the requirement to distribute the source code, even though third parties are not compelled to copy the source along with the object code. 5. A program that contains no derivative of any portion of the Library, but is designed to work with the Library by being compiled or linked with it, is called a "work that uses the Library". Such a work, in isolation, is not a derivative work of the Library, and therefore falls outside the scope of this License. However, linking a "work that uses the Library" with the Library creates an executable that is a derivative of the Library (because it contains portions of the Library), rather than a "work that uses the library". The executable is therefore covered by this License. Section 6 states terms for distribution of such executables. When a "work that uses the Library" uses material from a header file that is part of the Library, the object code for the work may be a derivative work of the Library even though the source code is not. Whether this is true is especially significant if the work can be linked without the Library, or if the work is itself a library. The threshold for this to be true is not precisely defined by law. If such an object file uses only numerical parameters, data structure layouts and accessors, and small macros and small inline functions (ten lines or less in length), then the use of the object file is unrestricted, regardless of whether it is legally a derivative work. (Executables containing this object code plus portions of the Library will still fall under Section 6.) Otherwise, if the work is a derivative of the Library, you may distribute the object code for the work under the terms of Section 6. Any executables containing that work also fall under Section 6, whether or not they are linked directly with the Library itself. 6. As an exception to the Sections above, you may also combine or link a "work that uses the Library" with the Library to produce a work containing portions of the Library, and distribute that work under terms of your choice, provided that the terms permit modification of the work for the customer's own use and reverse engineering for debugging such modifications. You must give prominent notice with each copy of the work that the Library is used in it and that the Library and its use are covered by this License. You must supply a copy of this License. If the work during execution displays copyright notices, you must include the copyright notice for the Library among them, as well as a reference directing the user to the copy of this License. Also, you must do one of these things: a) Accompany the work with the complete corresponding machine-readable source code for the Library including whatever changes were used in the work (which must be distributed under Sections 1 and 2 above); and, if the work is an executable linked with the Library, with the complete machine-readable "work that uses the Library", as object code and/or source code, so that the user can modify the Library and then relink to produce a modified executable containing the modified Library. (It is understood that the user who changes the contents of definitions files in the Library will not necessarily be able to recompile the application to use the modified definitions.) b) Use a suitable shared library mechanism for linking with the Library. A suitable mechanism is one that (1) uses at run time a copy of the library already present on the user's computer system, rather than copying library functions into the executable, and (2) will operate properly with a modified version of the library, if the user installs one, as long as the modified version is interface-compatible with the version that the work was made with. c) Accompany the work with a written offer, valid for at least three years, to give the same user the materials specified in Subsection 6a, above, for a charge no more than the cost of performing this distribution. d) If distribution of the work is made by offering access to copy from a designated place, offer equivalent access to copy the above specified materials from the same place. e) Verify that the user has already received a copy of these materials or that you have already sent this user a copy. For an executable, the required form of the "work that uses the Library" must include any data and utility programs needed for reproducing the executable from it. However, as a special exception, the materials to be distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. It may happen that this requirement contradicts the license restrictions of other proprietary libraries that do not normally accompany the operating system. Such a contradiction means you cannot use both them and the Library together in an executable that you distribute. 7. You may place library facilities that are a work based on the Library side-by-side in a single library together with other library facilities not covered by this License, and distribute such a combined library, provided that the separate distribution of the work based on the Library and of the other library facilities is otherwise permitted, and provided that you do these two things: a) Accompany the combined library with a copy of the same work based on the Library, uncombined with any other library facilities. This must be distributed under the terms of the Sections above. b) Give prominent notice with the combined library of the fact that part of it is a work based on the Library, and explaining where to find the accompanying uncombined form of the same work. 8. You may not copy, modify, sublicense, link with, or distribute the Library except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense, link with, or distribute the Library is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. 9. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Library or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Library (or any work based on the Library), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Library or works based on it. 10. Each time you redistribute the Library (or any work based on the Library), the recipient automatically receives a license from the original licensor to copy, distribute, link with or modify the Library subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties with this License. 11. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Library at all. For example, if a patent license would not permit royalty-free redistribution of the Library by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Library. If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply, and the section as a whole is intended to apply in other circumstances. It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. 12. If the distribution and/or use of the Library is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Library under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. 13. The Free Software Foundation may publish revised and/or new versions of the Lesser General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Library specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Library does not specify a license version number, you may choose any version ever published by the Free Software Foundation. 14. If you wish to incorporate parts of the Library into other free programs whose distribution conditions are incompatible with these, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Libraries If you develop a new library, and you want it to be of the greatest possible use to the public, we recommend making it free software that everyone can redistribute and change. You can do so by permitting redistribution under these terms (or, alternatively, under the terms of the ordinary General Public License). To apply these terms, attach the following notices to the library. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Also add information on how to contact you by electronic and paper mail. You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the library, if necessary. Here is a sample; alter the names: Yoyodyne, Inc., hereby disclaims all copyright interest in the library `Frob' (a library for tweaking knobs) written by James Random Hacker. , 1 April 1990 Ty Coon, President of Vice That's all there is to it! ocaml-csv-1.4.2/Makefile000066400000000000000000000027431261026267100150530ustar00rootroot00000000000000PKGNAME = $(shell oasis query name) PKGVERSION = $(shell oasis query version) PKG_TARBALL = $(PKGNAME)-$(PKGVERSION).tar.gz DISTFILES = LICENSE.txt INSTALL.txt README.md _oasis \ _tags Makefile setup.ml \ $(filter-out %~, $(wildcard src/*) $(wildcard examples/*) $(wildcard tests/*)) WEB = shell.forge.ocamlcore.org:/home/groups/csv/htdocs .PHONY: all byte native configure doc test install uninstall reinstall \ upload-doc all byte native: configure ocaml setup.ml -build configure: setup.ml ocaml $< -configure --enable-tests setup.ml: _oasis oasis setup -setup-update dynamic test doc install uninstall reinstall: all ocaml setup.ml -$@ upload-doc: doc scp -C -p -r _build/API.docdir $(WEB) csvtool: all ./csvtool.native pastecol 1-3 2,1,2 \ tests/testcsv9.csv tests/testcsv9.csv # "Force" a tag to be defined for each released tarball dist tar: setup.ml @ if [ -z "$(PKGNAME)" ]; then echo "PKGNAME not defined"; exit 1; fi @ if [ -z "$(PKGVERSION)" ]; then \ echo "PKGVERSION not defined"; exit 1; fi mkdir $(PKGNAME)-$(PKGVERSION) cp -r --parents $(DISTFILES) $(PKGNAME)-$(PKGVERSION)/ # Generate a setup.ml independent of oasis cd $(PKGNAME)-$(PKGVERSION) && oasis setup tar -zcvf $(PKG_TARBALL) $(PKGNAME)-$(PKGVERSION) $(RM) -rf $(PKGNAME)-$(PKGVERSION) web: doc $(MAKE) -C doc $@ .PHONY: clean distclean clean:: ocaml setup.ml -clean $(RM) $(PKG_TARBALL) setup.data distclean: ocaml setup.ml -distclean $(RM) $(wildcard *.ba[0-9] *.bak *~ *.odocl) ocaml-csv-1.4.2/README.md000066400000000000000000000021001261026267100146550ustar00rootroot00000000000000[![Build Status](https://travis-ci.org/Chris00/ocaml-csv.svg?branch=master)](https://travis-ci.org/Chris00/ocaml-csv) OCaml CSV ========= The [comma-separated values](http://en.wikipedia.org/wiki/Comma-separated_values) format — or CSV for short — is a simple tabular format supported by all major spreadsheets. This library implements pure OCaml functions to read and write files in this format (including Excel extensions) as well as some convenience functions to manipulate such data. Compile & install ----------------- The easiest way to install this library is to use [OPAM](http://opam.ocaml.org/): opam install csv If you prefer to compile by hand, run ocaml setup.ml -configure ocaml setup.ml -build ocaml setup.ml -install Note that This package uses [oasis](https://github.com/ocaml/oasis) to generate its configure, build and install scripts. However, you only need `oasis` if you want to compile the development version. Uninstall --------- With OPAM: opam remove csv Manually (from the source directory): ocaml setup.ml -uninstall ocaml-csv-1.4.2/_oasis000066400000000000000000000046131261026267100146110ustar00rootroot00000000000000# -*-conf-*- OASISFormat: 0.4 Name: csv Version: 1.4.2 Synopsis: A pure OCaml library to read and write CSV files. Description: This is a pure OCaml library to read and write CSV files, including all extensions used by Excel — e.g. quotes, newlines, 8 bit characters in fields, \"0 etc. A special representation of rows of CSV files with a header is provided. The library comes with a handy command line tool called csvtool for handling CSV files from shell scripts. Authors: Richard Jones, Christophe Troestler Maintainers: Christophe Troestler Categories: http://database, http://science License: LGPL-2.1 with OCaml linking exception Homepage: https://github.com/Chris00/ocaml-csv Plugins: META (0.4), StdFiles (0.4) Library csv Path: src BuildTools: ocamlbuild Modules: Csv BuildDepends: bytes Document API Title: API reference for Csv Type: OCamlbuild (0.4) InstallDir: $docdir/api BuildTools: ocamldoc, ocamlbuild XOCamlbuildPath: . XOCamlbuildLibraries: csv # Examples Executable csvtool Path: examples/ MainIs: csvtool.ml BuildDepends: csv CompiledObject: best BuildTools: ocamlbuild Install: true Executable example Build$: flag(tests) Path: examples/ MainIs: example.ml BuildDepends: csv CompiledObject: best BuildTools: ocamlbuild Install: false # Testing Executable test Build$: flag(tests) Path: tests/ MainIs: test.ml BuildDepends: csv CompiledObject: best BuildTools: ocamlbuild Install: false Test conformity WorkingDirectory: tests/ Command: $test Run: true Executable test_header Path: tests/ MainIs: test_header.ml BuildDepends: csv CompiledObject: best BuildTools: ocamlbuild Install: false Test header Command: $test_header Run: true Executable test_write Build$: flag(tests) Path: tests/ MainIs: test_write.ml BuildDepends: csv CompiledObject: best BuildTools: ocamlbuild Install: false Test write WorkingDirectory: tests/ Command: $test_write Run: true SourceRepository master Type: git Location: https://github.com/Chris00/ocaml-csv.git Browser: https://github.com/Chris00/ocaml-csv ocaml-csv-1.4.2/_tags000066400000000000000000000001371261026267100144260ustar00rootroot00000000000000# OASIS_START # OASIS_STOP true: annot, safe_string true: warn(A-4@8-29-33-40-41-42-44-45-48) ocaml-csv-1.4.2/examples/000077500000000000000000000000001261026267100152235ustar00rootroot00000000000000ocaml-csv-1.4.2/examples/csvtool.ml000066400000000000000000000717471261026267100172660ustar00rootroot00000000000000(* Handy tool for managing CSV files. @author Richard Jones *) open Printf (*------------------------------ start of code from extlib *) exception Invalid_string let find str sub = let sublen = String.length sub in if sublen = 0 then 0 else let found = ref 0 in let len = String.length str in try for i = 0 to len - sublen do let j = ref 0 in while String.unsafe_get str (i + !j) = String.unsafe_get sub !j do incr j; if !j = sublen then begin found := i; raise Exit; end; done; done; raise Invalid_string with Exit -> !found let split str sep = let p = find str sep in let len = String.length sep in let slen = String.length str in String.sub str 0 p, String.sub str (p + len) (slen - p - len) let nsplit str sep = if str = "" then [] else ( let rec nsplit str sep = try let s1 , s2 = split str sep in s1 :: nsplit s2 sep with Invalid_string -> [str] in nsplit str sep ) type 'a mut_list = { hd: 'a; mutable tl: 'a list } external inj : 'a mut_list -> 'a list = "%identity" let dummy_node () = { hd = Obj.magic (); tl = [] } let rec drop n = function | _ :: l when n > 0 -> drop (n-1) l | l -> l let take n l = let rec loop n dst = function | h :: t when n > 0 -> let r = { hd = h; tl = [] } in dst.tl <- inj r; loop (n-1) r t | _ -> () in let dummy = dummy_node() in loop n dummy l; dummy.tl (*------------------------------ end of extlib code *) (* Parse column specs. *) type colspec = range list and range = | Col of int (* 0 *) | Range of int * int (* 2-5 *) | ToEnd of int (* 7- *) let parse_colspec ~count_zero colspec = let cols = nsplit colspec "," in let cols = List.map ( fun col -> try (try let first, second = split col "-" in if second <> "" then Range (int_of_string first, int_of_string second) else ToEnd (int_of_string first) with Invalid_string -> Col (int_of_string col) ) with Failure "int_of_string" -> failwith (colspec ^ ":" ^ col ^ ": invalid column-spec") ) cols in (* Adjust so columns always count from zero. *) if not count_zero then List.map ( function | Col c -> Col (c-1) | Range (s, e) -> Range (s-1, e-1) | ToEnd e -> ToEnd (e-1) ) cols else cols let rec width_of_colspec = function | [] -> 0 | Col _ :: rest -> 1 + width_of_colspec rest | Range (s, e) :: rest -> (e-s+1) + width_of_colspec rest | ToEnd _ :: _ -> failwith "width_of_colspec: cannot calculate width of an open column spec (one which contains 'N-')" (* For closed column specs, this preserves the correct width in the * result. *) let cols_of_colspec colspec row = let rec loop = function | [] -> [] | Col c :: rest -> (try List.nth row c with Failure "nth" -> "") :: loop rest | Range (s, e) :: rest -> let width = e-s+1 in let range = take width (drop s row) in let range = List.hd (Csv.set_columns ~cols:width [range]) in List.append range (loop rest) | ToEnd e :: rest -> List.append (drop e row) (loop rest) in loop colspec (* The actual commands. *) let cmd_cols ~input_sep ~output_sep ~chan colspec files = List.iter ( fun filename -> let csv = Csv.load ~separator:input_sep filename in let csv = List.map (cols_of_colspec colspec) csv in Csv.output_all (Csv.to_channel ~separator:output_sep chan) csv ) files let cmd_namedcols ~input_sep ~output_sep ~chan names files = List.iter ( fun filename -> let csv = Csv.load ~separator:input_sep filename in let header, data = match csv with | [] -> failwith "no rows in this CSV file" | h :: t -> h, t in (* Do the headers requested exist in the CSV file? If not, * throw an error. *) List.iter ( fun name -> if not (List.mem name header) then failwith ("namedcol: requested header not in CSV file: " ^ name) ) names; let data = Csv.associate header data in let data = List.map ( fun row -> List.map (fun name -> List.assoc name row) names ) data in let data = names :: data in Csv.output_all (Csv.to_channel ~separator:output_sep chan) data ) files let cmd_width ~input_sep ~chan files = let width = List.fold_left ( fun width filename -> let csv = Csv.load ~separator:input_sep filename in let width = max width (Csv.columns csv) in width ) 0 files in fprintf chan "%d\n" width let cmd_height ~input_sep ~chan files = let height = List.fold_left ( fun height filename -> let csv = Csv.load ~separator:input_sep filename in let height = height + Csv.lines csv in height ) 0 files in fprintf chan "%d\n" height let cmd_readable ~input_sep ~chan files = let csv = List.concat (List.map (Csv.load ~separator:input_sep) files) in Csv.save_out_readable chan csv let iter_csv_rows ~input_sep ~f files = List.iter (fun filename -> let in_chan, close = match filename with | "-" -> stdin, false | filename -> open_in filename, true in try Csv.iter ~f (Csv.of_channel ~separator:input_sep in_chan); if close then close_in in_chan with Exit -> if close then close_in in_chan ) files let cmd_cat ~input_sep ~output_sep ~chan files = (* Avoid loading the whole file into memory. *) let chan = Csv.to_channel ~separator:output_sep chan in let f row = Csv.output_record chan row in iter_csv_rows ~input_sep ~f files let cmd_paste ~input_sep ~output_sep ~chan files = (* Return the 1st row, concatenation of all 1st rows; whether all CSV files are empty; and the CSV files without their 1st row. *) let rec add_columns = function | [] -> ([], true, []) (* empty CSV file list *) | [] :: csvs -> (* exhausted the first CSV file *) let row, empty, csvs = add_columns csvs in (row, empty, [] :: csvs) | (r :: csv0) :: csvs -> let row, _, csvs = add_columns csvs in (r @ row, false, csv0 :: csvs) in let rec paste_rows csvs final_csv = let row, empty, csvs = add_columns csvs in if empty then List.rev final_csv else paste_rows csvs (row :: final_csv) in let csvs = List.map (Csv.load ~separator:input_sep) files in let csv = paste_rows csvs [] in Csv.output_all (Csv.to_channel ~separator:output_sep chan) csv (* Given [colspec1] and [colspec2], return an associative list that indicates the correspondence between the i th column specified by [colspec1] and the corresponding one in [colspec2]. *) let rec colspec_map colspec1 colspec2 = match colspec1 with | [] -> [] | Col i :: tl1 -> (match colspec2 with | Col k :: tl2 -> (i,k) :: colspec_map tl1 tl2 | Range(k,l) :: tl2 -> let colspec2 = if k < l then Range(k+1, l) :: tl2 else if k = l then tl2 else (* k > l *) Range(k-1, l) :: tl2 in (i,k) :: colspec_map tl1 colspec2 | ToEnd k :: _ -> (i, k) :: colspec_map tl1 [ToEnd(k+1)] | [] -> failwith "pastecol: the second range does not contain \ enough columns") | Range(i,j) :: tl1 -> let colspec1 = if i < j then Range(i+1, j) :: tl1 else if i = j then tl1 else (* i > j *) Range(i-1, j) :: tl1 in (match colspec2 with | Col k :: tl2 -> (i,k) :: colspec_map colspec1 tl2 | Range(k,l) :: tl2 -> let colspec2 = if k < l then Range(k+1, l) :: tl2 else if k = l then tl2 else (* k > l *) Range(k-1, l) :: tl2 in (i,k) :: colspec_map colspec1 colspec2 | ToEnd k :: _ -> (i,k) :: colspec_map colspec1 [ToEnd(k+1)] | [] -> failwith "pastecol: the second range does not contain \ enough columns") | ToEnd i :: _ -> let m = sprintf "pastecol: the first range cannot contain an open \ range like %i-" i in failwith m (* When several bindings are defined for an initial column, use the last one. ASSUME that the associative map is sorted w.r.t. the first data. *) let rec reduce_colspec_map = function | (i,_) :: (((j,_) :: _) as tl) when (i: int) = j -> reduce_colspec_map tl (* maybe (j,_) is also supplanted *) | m :: tl -> m :: reduce_colspec_map tl | [] -> [] let cmd_pastecol ~input_sep ~output_sep ~chan colspec1 colspec2 file1 file2 = let csv1 = Csv.load ~separator:input_sep file1 in let csv2 = Csv.load ~separator:input_sep file2 in let m = colspec_map colspec1 colspec2 in let m = List.stable_sort (fun (i,_) (j,_) -> compare (i:int) j) m in let m = reduce_colspec_map m in let rec update m curr_col row1 row2 = match m with | [] -> row1 (* substitutions exhausted *) | (i, j) :: m_tl -> let c, row1 = match row1 with | [] -> "", [] (* row exhausted but some remaining substitutions must be performed. Create new columns. *) | c :: row1_tl -> c, row1_tl in if curr_col = i then let c' = try List.nth row2 j with _ -> "" in c' :: update m_tl (curr_col + 1) row1 row2 else (* curr_col < i *) c :: update m (curr_col + 1) row1 row2 in let csv = List.map2 (update m 0) csv1 csv2 in Csv.output_all (Csv.to_channel ~separator:output_sep chan) csv let cmd_set_columns ~input_sep ~output_sep ~chan cols files = (* Avoid loading the whole file into memory. *) let f row = let csv = [row] in let csv = Csv.set_columns ~cols csv in Csv.output_all (Csv.to_channel ~separator:output_sep chan) csv in iter_csv_rows ~input_sep ~f files let cmd_set_rows ~input_sep ~output_sep ~chan rows files = let csv = List.concat (List.map (Csv.load ~separator:input_sep) files) in let csv = Csv.set_rows ~rows csv in Csv.output_all (Csv.to_channel ~separator:output_sep chan) csv let cmd_head ~input_sep ~output_sep ~chan rows files = (* Avoid loading the whole file into memory, or even loading * later files. *) let nr_rows = ref rows in let chan = Csv.to_channel ~separator:output_sep chan in let f row = if !nr_rows > 0 then ( decr nr_rows; Csv.output_record chan row ) else raise Exit in iter_csv_rows ~input_sep ~f files let cmd_drop ~input_sep ~output_sep ~chan rows files = (* Avoid loading the whole file into memory. *) let nr_rows = ref rows in let chan = Csv.to_channel ~separator:output_sep chan in let f row = if !nr_rows = 0 then Csv.output_record chan row else decr nr_rows in iter_csv_rows ~input_sep ~f files let cmd_square ~input_sep ~output_sep ~chan files = let csv = List.concat (List.map (Csv.load ~separator:input_sep) files) in let csv = Csv.square csv in Csv.output_all (Csv.to_channel ~separator:output_sep chan) csv let cmd_sub ~input_sep ~output_sep ~chan r c rows cols files = let csv = List.concat (List.map (Csv.load ~separator:input_sep) files) in let csv = Csv.sub ~r ~c ~rows ~cols csv in Csv.output_all (Csv.to_channel ~separator:output_sep chan) csv let cmd_replace ~input_sep ~output_sep ~chan colspec update files = let csv = List.concat (List.map (Csv.load ~separator:input_sep) files) in (* Load the update CSV file in. *) let update = Csv.load ~separator:input_sep update in (* Compare two rows for equality by considering only the columns * in colspec. *) let equal row1 row2 = let row1 = cols_of_colspec colspec row1 in let row2 = cols_of_colspec colspec row2 in 0 = Csv.compare [row1] [row2] in (* Look for rows in the original to be replaced by rows from the * update file. This is an ugly O(n^2) hack (XXX). *) let csv = List.filter ( fun row -> not (List.exists (equal row) update) ) csv in let csv = csv @ update in Csv.output_all (Csv.to_channel ~separator:output_sep chan) csv let cmd_transpose ~input_sep ~output_sep ~chan files = List.iter (fun file -> let tr = Csv.transpose (Csv.load ~separator:input_sep file) in Csv.output_all (Csv.to_channel ~separator:output_sep chan) tr ) files type format_el = String of string | Col of int let is_digit c = '0' <= c && c <= '9' (* Return the non-negative number starting at [i0 < i1 < len_s = String.length s] and the index of the first character after that number. It is expected that [s.[i0]] be a digit, otherwise [Failure] is be raised. *) let rec get_digit s len_s i0 i1 = if i1 < len_s then if is_digit s.[i1] then get_digit s len_s i0 (i1 + 1) else (int_of_string(String.sub s i0 (i1 - i0)), i1) else (* i0 < i1 (>)= len_s *) (int_of_string(String.sub s i0 (len_s - i0)), len_s) (* Prepend to the format [fmt] the substring s.[i0 .. i1 - 1] unless it is empty. *) let prepend_substring s i0 i1 fmt = if i0 < i1 then String(String.sub s i0 (i1 - i0)) :: fmt else (* i0 ≥ i1, empty substring *) fmt (* [i0 ≤ i1 ≤ len_s] *) let rec split_format s len_s i0 i1 = if i1 >= len_s then if i0 < len_s then [String(String.sub s i0 (len_s - i0))] else [] else if s.[i1] = '%' then let i2 = i1 + 1 in if i2 >= len_s then split_format s len_s i0 i2 (* consider a final '%' as a normal char *) else if is_digit s.[i2] then let col, i3 = get_digit s len_s i2 (i2 + 1) in prepend_substring s i0 i1 (Col col :: split_format s len_s i3 i3) else if s.[i2] = '(' then if i2 + 1 < len_s && is_digit s.[i2 + 1] then ( let col, i3 = get_digit s len_s (i2 + 1) (i2 + 2) in if i3 >= len_s || s.[i3] <> ')' then ( let r = String.sub s i1 (i3 - i1) in failwith(sprintf "Column format %S not terminated by ')'" r) ); prepend_substring s i0 i1 (Col col :: split_format s len_s (i3 + 1) (i3 + 1)) ) else failwith "Column format %( not followed by a number" else if s.[i2] = '%' then let i3 = i2 + 1 in String(String.sub s i0 (i1 - i0 + 1)) :: split_format s len_s i3 i3 else (* % + non-digit, consider it a literal '%' *) split_format s len_s i0 i2 else if s.[i1] = '\\' then (* Handle usual escapes. *) let i2 = i1 + 1 in if i2 >= len_s then split_format s len_s i0 i2 else if s.[i2] = 'n' then let i3 = i2 + 1 in prepend_substring s i0 i1 (String "\n" :: split_format s len_s i3 i3) else if s.[i2] = 'r' then let i3 = i2 + 1 in prepend_substring s i0 i1 (String "\r" :: split_format s len_s i3 i3) else if s.[i2] = 't' then let i3 = i2 + 1 in prepend_substring s i0 i1 (String "\t" :: split_format s len_s i3 i3) else split_format s len_s i0 i2 else split_format s len_s i0 (i1 + 1) let print_format row = function | String s -> print_string s | Col c -> try print_string (List.nth row (c - 1)) with _ -> () let cmd_format ~input_sep fmt files = let fmt = split_format fmt (String.length fmt) 0 0 in iter_csv_rows ~input_sep files ~f:(fun row -> List.iter (print_format row) fmt) let cmd_call ~input_sep command files = (* Avoid loading the whole file into memory. *) (* Use bash if it exists to enable the [command] to be an exported bash function. *) let want_bash = Sys.os_type = "Unix" && Sys.file_exists "/bin/bash" in let f row = let cmd = String.concat " " (command :: List.map Filename.quote row) in let cmd = if want_bash then "/bin/bash -c " ^ (Filename.quote cmd) else cmd in let code = Sys.command cmd in if code <> 0 then ( eprintf "%s: terminated with exit code %d\n" command code; exit code ) in iter_csv_rows ~input_sep ~f files let rec uniq = function | [] -> [] | [x] -> [x] | x :: y :: xs when Pervasives.compare x y = 0 -> uniq (x :: xs) | x :: y :: xs -> x :: uniq (y :: xs) let cmd_join ~input_sep ~output_sep ~chan colspec1 colspec2 files = (* Load in the files separately. *) let csvs = List.map (Csv.load ~separator:input_sep) files in (* For each CSV file, construct a hash table from row class (key) to * the (possibly empty) output columns (values). * Also construct a hash which has the unique list of row classes. *) let keys = Hashtbl.create 1023 in let hashes = List.map ( fun csv -> let hash = Hashtbl.create 1023 in List.iter ( fun row -> let key = cols_of_colspec colspec1 row in let value = cols_of_colspec colspec2 row in if not (Hashtbl.mem keys key) then Hashtbl.add keys key true; Hashtbl.add hash key value ) csv; hash ) csvs in (* Get the keys. *) let keys = Hashtbl.fold (fun key _ xs -> key :: xs) keys [] in let value_width = width_of_colspec colspec2 in let empty_value = List.hd (Csv.set_columns ~cols:value_width [[""]]) in let multiple_values = List.hd (Csv.set_columns ~cols:value_width [["!MULTIPLE VALUES"]]) in (* Generate output CSV. *) let keys = List.sort Pervasives.compare keys in let keys = List.map (fun key -> key, []) keys in let csv = List.fold_left ( fun keys hash -> List.map ( fun (key, values) -> let value = try Hashtbl.find_all hash key with Not_found -> [] in let value = match value with | [] -> empty_value | [value] -> value | _::_ -> multiple_values in key, (value :: values) ) keys ) keys hashes in let csv = List.map ( fun (key, values) -> key @ List.flatten (List.rev values) ) csv in Csv.output_all (Csv.to_channel ~separator:output_sep chan) csv let cmd_trim ~input_sep ~output_sep ~chan (top, left, right, bottom) files = let csv = List.concat (List.map (Csv.load ~separator:input_sep) files) in let csv = Csv.trim ~top ~left ~right ~bottom csv in Csv.output_all (Csv.to_channel ~separator:output_sep chan) csv and trim_flags flags = let set c = try ignore (String.index flags c); true with Not_found -> false in let top = set 't' in let left = set 'l' in let right = set 'r' in let bottom = set 'b' in (top, left, right, bottom) (* Process the arguments. *) let usage = "csvtool - Copyright (C) 2005-2006 Richard W.M. Jones, Merjis Ltd. - Copyright (C) 2007- Richard W.M. Jones & Christophe Troestler csvtool is a tool for performing manipulations on CSV files from shell scripts. Summary: csvtool [-options] command [command-args] input.csv [input2.csv [...]] Commands: col Return one or more columns from the CSV file. For , see below. Example: csvtool col 1-3,6 input.csv > output.csv namedcol Assuming the first row of the CSV file is a list of column headings, this returned the column(s) with the named headings. is a comma-separated list of names. Example: csvtool namedcol Account,Cost input.csv > output.csv width Print the maximum width of the CSV file (number of columns in the widest row). height Print the number of rows in the CSV file. For most CSV files this is equivalent to 'wc -l', but note that some CSV files can contain a row which breaks over two (or more) lines. setcolumns cols Set the number of columns to cols (this also makes the CSV file square). Any short rows are padding with blank cells. Any long rows are truncated. setrows rows 'setrows n' sets the number of rows to 'n'. If there are fewer than 'n' rows in the CSV files, then empty blank lines are added. head rows take rows 'head n' and 'take n' (which are synonyms) take the first 'n' rows. If there are fewer than 'n' rows, padding is not added. drop rows Drop the first 'rows' rows and return the rest (if any). Example: To remove the headings from a CSV file with headings: csvtool drop 1 input.csv > output.csv To extract rows 11 through 20 from a file: csvtool drop 10 input.csv | csvtool take 10 - > output.csv cat This concatenates the input files together and writes them to the output. You can use this to change the separator character. Example: csvtool -t TAB -u COMMA cat input.tsv > output.csv paste Concatenate the columns of the files together and write them to the output. Example: csvtool paste input1.csv input2.csv > output.csv pastecol input.csv update.csv Replace the content of the columns referenced by in the file input.csv with the one of the corresponding column specified by in update.csv. Example: csvtool pastecol 2-3 1- input.csv update.csv.csv > output.csv join Join (collate) multiple CSV files together. controls which columns are compared. controls which columns are copied into the new file. Example: csvtool join 1 2 coll1.csv coll2.csv > output.csv In the above example, if coll1.csv contains: Computers,$40 Software,$100 and coll2.csv contains: Computers,$50 then the output will be: Computers,$40,$50 Software,$100, square Make the CSV square, so all rows have the same length. Example: csvtool square input.csv > input-square.csv trim [tlrb]+ Trim empty cells at the top/left/right/bottom of the CSV file. Example: csvtool trim t input.csv # trims empty rows at the top only csvtool trim tb input.csv # trims empty rows at the top & bottom csvtool trim lr input.csv # trims empty columns at left & right csvtool trim tlrb input.csv # trims empty rows/columns all around sub r c rows cols Take a square subset of the CSV, top left at row r, column c, which is rows deep and cols wide. 'r' and 'c' count from 1, or from 0 if -z option is given. replace update.csv original.csv Replace rows in original.csv with rows from update.csv. The columns in only are used to compare rows in input.csv and update.csv to see if they are candidates for replacement. Example: csvtool replace 3 updates.csv original.csv > new.csv mv new.csv original.csv transpose input.csv Transpose the lines and columns of the CSV file. format fmt Print each row of the files according to the format 'fmt'. Each occurrence of \"%i\" or \"%(i)\" (where 'i' is a number) in 'fmt' is replaced by the content of column number 'i' (remember that the leftmost column is numbered 1 in the traditional spreadsheet fashion). A literal percent is obtained by doubling it. The usual escape sequences \\n, \\r, and \\t are recognized. Example: csvtool format '%(1) -> %8%%\\n' input.csv call command This calls the external command (or shell function) 'command' followed by a parameter for each column in the CSV file. The external command is called once for each row in the CSV file. If any command returns a non-zero exit code then the whole program terminates. Tip: Use the shell command 'export -f funcname' to export a shell function for use as a command. Within the function, use the positional parameters $1, $2, ... to refer to the columns. Example (with a shell function): function test { echo Column 1: $1 echo Column 2: $2 } export -f test csvtool call test my.csv In the above example, if my.csv contains: how,now brown,cow then the output is: Column 1: how Column 2: now Column 1: brown Column 2: cow readable Print the input CSV in a readable format. Column specs: A is a comma-separated list of column numbers or column ranges. Examples: 1 Column 1 (the first, leftmost column) 2,5,7 Columns 2, 5 and 7 1-3,5 Columns 1, 2, 3 and 5 1,5- Columns 1, 5 and up. Columns are numbered starting from 1 unless the -z option is given. Input files: csvtool takes a list of input file(s) from the command line. If an input filename is '-' then take input from stdin. Output file: Normally the output is written to stdout. Use the -o option to override this. Separators: The default separator character is , (comma). To change this on input or output see the -t and -u options respectively. Use -t TAB or -u TAB (literally T-A-B!) to specify tab-separated files. Options:" let () = let input_sep = ref ',' in let set_input_sep = function | "TAB" -> input_sep := '\t' | "COMMA" -> input_sep := ',' | "SPACE" -> input_sep := ' ' | s -> input_sep := s.[0] in let output_sep = ref ',' in let set_output_sep = function | "TAB" -> output_sep := '\t' | "COMMA" -> output_sep := ',' | "SPACE" -> output_sep := ' ' | s -> output_sep := s.[0] in let count_zero = ref false in let output_file = ref "" in let rest = ref [] in let set_rest str = rest := str :: !rest in let argspec = [ "-t", Arg.String set_input_sep, "Input separator char. Use -t TAB for tab separated input."; "-u", Arg.String set_output_sep, "Output separator char. Use -u TAB for tab separated output."; "-o", Arg.Set_string output_file, "Write output to file (instead of stdout)"; "-z", Arg.Set count_zero, "Number columns from 0 instead of 1"; "-", Arg.Unit (fun () -> set_rest "-"), "" (* Hack to allow '-' for input from stdin. *) ] in Arg.parse argspec set_rest usage; let input_sep = !input_sep in let output_sep = !output_sep in let count_zero = !count_zero in let output_file = !output_file in let rest = List.rev !rest in (* Set up the output file. *) let chan = if output_file <> "" then open_out output_file else stdout in (match rest with | ("col"|"cols") :: colspec :: files -> let colspec = parse_colspec ~count_zero colspec in cmd_cols ~input_sep ~output_sep ~chan colspec files | ("namedcol"|"namedcols") :: names :: files -> let names = nsplit names "," in cmd_namedcols ~input_sep ~output_sep ~chan names files | ("width"|"columns") :: files -> cmd_width ~input_sep ~chan files | ("height"|"rows") :: files -> cmd_height ~input_sep ~chan files | "readable" :: files -> cmd_readable ~input_sep ~chan files | ("cat"|"concat") :: files -> cmd_cat ~input_sep ~output_sep ~chan files | "paste" :: files -> cmd_paste ~input_sep ~output_sep ~chan files | "pastecol" :: colspec1 :: colspec2 :: file1 :: file2 :: _ -> let colspec1 = parse_colspec ~count_zero colspec1 in let colspec2 = parse_colspec ~count_zero colspec2 in cmd_pastecol ~input_sep ~output_sep ~chan colspec1 colspec2 file1 file2 | ("join"|"collate") :: colspec1 :: colspec2 :: ((_::_::_) as files) -> let colspec1 = parse_colspec ~count_zero colspec1 in let colspec2 = parse_colspec ~count_zero colspec2 in cmd_join ~input_sep ~output_sep ~chan colspec1 colspec2 files | "square" :: files -> cmd_square ~input_sep ~output_sep ~chan files | "sub" :: r :: c :: rows :: cols :: files -> let r = int_of_string r in let r = if not count_zero then r-1 else r in let c = int_of_string c in let c = if not count_zero then c-1 else c in let rows = int_of_string rows in let cols = int_of_string cols in cmd_sub ~input_sep ~output_sep ~chan r c rows cols files | "replace" :: colspec :: update :: files -> let colspec = parse_colspec ~count_zero colspec in cmd_replace ~input_sep ~output_sep ~chan colspec update files | ("setcolumns"|"set_columns"|"set-columns"| "setcols"|"set_cols"|"set-cols") :: cols :: files -> let cols = int_of_string cols in cmd_set_columns ~input_sep ~output_sep ~chan cols files | ("setrows"|"set_rows"|"set-rows") :: rows :: files -> let rows = int_of_string rows in cmd_set_rows ~input_sep ~output_sep ~chan rows files | ("head"|"take") :: rows :: files -> let rows = int_of_string rows in cmd_head ~input_sep ~output_sep ~chan rows files | "drop" :: rows :: files -> let rows = int_of_string rows in cmd_drop ~input_sep ~output_sep ~chan rows files | "transpose" :: files -> cmd_transpose ~input_sep ~output_sep ~chan files | "format" :: fmt :: files -> cmd_format ~input_sep fmt files | "call" :: command :: files -> cmd_call ~input_sep command files | "trim" :: flags :: files -> let flags = trim_flags flags in cmd_trim ~input_sep ~output_sep ~chan flags files | _ -> prerr_endline (Sys.executable_name ^ " --help for usage"); exit 2 ); if output_file <> "" then close_out chan ocaml-csv-1.4.2/examples/example.ml000066400000000000000000000026461261026267100172200ustar00rootroot00000000000000(* See also 'test.ml' for examples, and 'csv.mli' for documentation. *) open Printf let embedded_csv = "\ \"Banner clickins\" \"Clickin\",\"Number\",\"Percentage\", \"brand.adwords\",\"4,878\",\"14.4\" \"vacation.advert2.adwords\",\"4,454\",\"13.1\" \"affiliates.generic.tc1\",\"1,608\",\"4.7\" \"brand.overture\",\"1,576\",\"4.6\" \"vacation.cheap.adwords\",\"1,515\",\"4.5\" \"affiliates.generic.vacation.biggestchoice\",\"1,072\",\"3.2\" \"breaks.no-destination.adwords\",\"1,015\",\"3.0\" \"fly.no-destination.flightshome.adwords\",\"833\",\"2.5\" \"exchange.adwords\",\"728\",\"2.1\" \"holidays.cyprus.cheap\",\"574\",\"1.7\" \"travel.adwords\",\"416\",\"1.2\" \"affiliates.vacation.generic.onlinediscount.200\",\"406\",\"1.2\" \"promo.home.topX.ACE.189\",\"373\",\"1.1\" \"homepage.hp_tx1b_20050126\",\"369\",\"1.1\" \"travel.agents.adwords\",\"358\",\"1.1\" \"promo.home.topX.SSH.366\",\"310\",\"0.9\"" let csvs = List.map (fun name -> name, Csv.load name) [ "examples/example1.csv"; "examples/example2.csv" ] let () = let ecsv = Csv.input_all(Csv.of_string embedded_csv) in printf "---Embedded CSV---------------------------------\n" ; Csv.print_readable ecsv; List.iter ( fun (name, csv) -> printf "---%s----------------------------------------\n" name; Csv.print_readable csv ) csvs; printf "Compare (Embedded CSV) example1.csv = %i\n" (Csv.compare ecsv (snd(List.hd csvs))) ocaml-csv-1.4.2/examples/example1.csv000066400000000000000000000012721261026267100174560ustar00rootroot00000000000000"Banner clickins" "Clickin","Number","Percentage", "brand.adwords","4,878","14.4" "vacation.advert2.adwords","4,454","13.1" "affiliates.generic.tc1","1,608","4.7" "brand.overture","1,576","4.6" "vacation.cheap.adwords","1,515","4.5" "affiliates.generic.vacation.biggestchoice","1,072","3.2" "breaks.no-destination.adwords","1,015","3.0" "fly.no-destination.flightshome.adwords","833","2.5" "exchange.adwords","728","2.1" "holidays.cyprus.cheap","574","1.7" "travel.adwords","416","1.2" "affiliates.vacation.generic.onlinediscount.200","406","1.2" "promo.home.topX.ACE.189","373","1.1" "homepage.hp_tx1b_20050126","369","1.1" "travel.agents.adwords","358","1.1" "promo.home.topX.SSH.366","310","0.9"ocaml-csv-1.4.2/examples/example2.csv000066400000000000000000000003531261026267100174560ustar00rootroot00000000000000"Visitors per search engine" "Search engine","Number", "Google","15,437" "MSN","2,372" "AOLSearch","1,885" "Yahoo","1,555" "Ask-Jeeves","998" "Ntlworld","303" "Myway","268" "Myway.com","169" "Freeserve","73" "BBC","41" "Altavista","26"ocaml-csv-1.4.2/src/000077500000000000000000000000001261026267100141745ustar00rootroot00000000000000ocaml-csv-1.4.2/src/Makefile000066400000000000000000000016571261026267100156450ustar00rootroot00000000000000ROOT=.. include $(ROOT)/Makefile.conf # Native code versions must be installed if and only if make opt was # called previously (i.e. the files exist): INSTALL_FILES = csv.mli $(wildcard *.cmi *.cma *.cmx *.a *.cmxa) .PHONY: default all opt byte native default: opt all: byte opt: native byte: csv.cma native: csv.cmxa csv.cma: csv.cmo csv.cmxa: csv.cmx .PHONY: doc doc: test -f $(DOC_DIR) || mkdir -p $(DOC_DIR) $(OCAMLDOC) -html -d $(DOC_DIR) $(OCAMLDOC_FLAGS) $(wildcard *.mli) .PHONY: install uninstall # TODO: copy HTML doc install: META $(INSTALL_FILES) ocamlfind install $(PACKAGE) $^ uninstall: ocamlfind remove $(PACKAGE) META: META.in @ sed -e "s/@PACKAGE@/$(PACKAGE)/" $< \ | sed -e "s/@VERSION@/$(VERSION)/" > $@ @ echo "Updated \"$@\"." # Make.bat -- easy compilation on win32 Make.bat: $(MAKE) clean # Filter out all "make" messages $(MAKE) all | grep --invert-match "make" > $@ include $(ROOT)/Makefile.ocaml ocaml-csv-1.4.2/src/csv.ml000066400000000000000000001111131261026267100153170ustar00rootroot00000000000000(* File: csv.ml Copyright (C) 2005-2009 Richard Jones email: rjones@redhat.com Christophe Troestler email: Christophe.Troestler@umons.ac.be WWW: http://math.umons.ac.be/anum/software/ This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 2.1 or later as published by the Free Software Foundation, with the special exception on linking described in the file LICENSE. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the file LICENSE for more details. *) (* MOTIVATION. There are already several solutions to parse CSV files in OCaml. They did not suit my needs however because: - The files I need to deal with have a header which does not always reflect the data structure (say the first row are the names of neurones but there are two columns per name). In other words I want to be able to deal with heterogeneous files. - I do not want to read the the whole file at once (I may but I just want to be able to choose). Higher order functions like fold are fine provided the read stops at the line an exception is raised (so it can be reread again). - For similarly encoded line, being able to specify once a decoder and then use a type safe version would be nice. - Speed is not neglected (we would like to be able to parse a ~2.5Mb file under 0.1 sec on my machine (2GHz Core Duo)). We follow the CVS format documentation available at http://www.creativyst.com/Doc/Articles/CSV/CSV01.htm *) type t = string list list (* Specialize [min] to integers for performance reasons (> 150% faster). *) let min x y = if (x:int) <= y then x else y let max x y = if (x:int) >= y then x else y (* Add Buffer.add_subbytes for all compiler versions. Copied from the OCaml stdlib. *) module Buffer = struct include Buffer let add_subbytes b s offset len = add_substring b (Bytes.unsafe_to_string s) offset len end (* Enhance the List module with tail rec functions. *) module List = struct include List (* Implementation of [map] in JSC Core. *) let map_slow l ~f = List.rev (List.rev_map f l) let rec count_map ~f l ctr = match l with | [] -> [] | [x1] -> let f1 = f x1 in [f1] | [x1; x2] -> let f1 = f x1 in let f2 = f x2 in [f1; f2] | [x1; x2; x3] -> let f1 = f x1 in let f2 = f x2 in let f3 = f x3 in [f1; f2; f3] | [x1; x2; x3; x4] -> let f1 = f x1 in let f2 = f x2 in let f3 = f x3 in let f4 = f x4 in [f1; f2; f3; f4] | x1 :: x2 :: x3 :: x4 :: x5 :: tl -> let f1 = f x1 in let f2 = f x2 in let f3 = f x3 in let f4 = f x4 in let f5 = f x5 in f1 :: f2 :: f3 :: f4 :: f5 :: (if ctr > 1000 then map_slow ~f tl else count_map ~f tl (ctr + 1)) let map f l = count_map ~f l 0 (* Implementation of [append] in JSC core. *) let slow_append l1 l2 = List.rev_append (List.rev l1) l2 let rec count_append l1 l2 count = match l1 with | [] -> l2 | [x1] -> x1 :: l2 | [x1; x2] -> x1 :: x2 :: l2 | [x1; x2; x3] -> x1 :: x2 :: x3 :: l2 | [x1; x2; x3; x4] -> x1 :: x2 :: x3 :: x4 :: l2 | x1 :: x2 :: x3 :: x4 :: x5 :: tl -> x1 :: x2 :: x3 :: x4 :: x5 :: (if count > 1000 then slow_append tl l2 else count_append tl l2 (count + 1)) let append l1 l2 = count_append l1 l2 0 (* Tail recursive [combine]. *) let rec rev_combine acc l1 l2 = match l1, l2 with | ([], []) -> acc | (a1::l1, a2::l2) -> rev_combine ((a1, a2) :: acc) l1 l2 | (_, _) -> invalid_arg "List.combine" let slow_combine l1 l2 = List.rev (rev_combine [] l1 l2) let rec count_combine l1 l2 count = match l1, l2 with | ([], []) -> [] | ([x1], [y1]) -> [x1, y1] | ([x1; x2], [y1; y2]) -> [x1, y1; x2, y2] | ([x1; x2; x3], [y1; y2; y3]) -> [x1, y1; x2, y2; x3, y3] | ([x1; x2; x3; x4], [y1; y2; y3; y4]) -> [x1, y1; x2, y2; x3, y3; x4, y4] | (x1 :: x2 :: x3 :: x4 :: tl1), (y1 :: y2 :: y3 :: y4 :: tl2) -> (x1, y1) :: (x2, y2) :: (x3, y3) :: (x4, y4) :: (if count > 1000 then slow_combine tl1 tl2 else count_combine tl1 tl2 (count + 1)) | (_, _) -> invalid_arg "List.combine" let combine l1 l2 = count_combine l1 l2 0 end class type in_obj_channel = object method input : Bytes.t -> int -> int -> int method close_in : unit -> unit end class type out_obj_channel = object method output : Bytes.t -> int -> int -> int method close_out : unit -> unit end (* * Representation of rows accessible by both keys and numbers *) module Header = struct module M = Map.Make(String) type t = { names : string array; index : int M.t } (* This is a correspondence between names and column numbers, in both directions. Names "" are not active and must not be in the index. *) let empty = { names = [| |]; index = M.empty } let get t i = try t.names.(i) with _ -> "" let find t name = M.find name t.index let of_names names = let names = Array.of_list names in let index = ref M.empty in for i = 0 to Array.length names - 1 do if names.(i) <> "" then if M.mem names.(i) !index then names.(i) <- "" (* remove duplicate binding *) else index := M.add names.(i) i !index done; { names; index = !index } let names t = Array.to_list t.names (* [main] names take precedence over [t] ones. *) let merge ~main t = let index = ref main.index in if Array.length main.names >= Array.length t.names then ( let names = Array.copy main.names in for i = 0 to Array.length t.names - 1 do if names.(i) = "" && t.names.(i) <> "" && not(M.mem t.names.(i) !index) then ( names.(i) <- t.names.(i); index := M.add names.(i) i !index ) done; { names; index = !index } ) else ( let names = Array.make (Array.length t.names) "" in for i = 0 to Array.length main.names - 1 do if main.names.(i) <> "" then names.(i) <- main.names.(i) else if t.names.(i) <> "" && not(M.mem t.names.(i) !index) then ( names.(i) <- t.names.(i); index := M.add names.(i) i !index ) done; for i = Array.length names to Array.length names - 1 do if t.names.(i) <> "" then ( names.(i) <- t.names.(i); index := M.add names.(i) i !index ) done; { names; index = !index } ) end module Row = struct (* Datastructure with double access (integer and key). *) type t = { header : Header.t; row: string array } let make header row = { header; row = Array.of_list row } let get t i = try t.row.(i) with _ -> "" let find t key = try t.row.(Header.find t.header key) with _ -> "" let to_list t = Array.to_list t.row let to_assoc t = let l = ref [] in for i = Array.length t.row - 1 downto 0 do l := (Header.get t.header i, t.row.(i)) :: !l done; !l let with_header t h = let h = Header.of_names h in { t with header = Header.merge ~main:h t.header } end (* * Input *) exception Failure of int * int * string let buffer_len = 0x1FFF (* We buffer the input as this allows the be efficient while using very basic input channels. The drawback is that if we want to use another tool, there will be data hold in the buffer. That is why we allow to convert a CSV handle to an object sharing the same buffer. Because if this, we actually decided to implement the CSV handle as an object that is coercible to a input-object. FIXME: This is not made for non-blocking channels. Can we fix it? *) type in_channel = { in_chan : in_obj_channel; in_buf : Bytes.t; (* The data in the in_buf is at indexes i s.t. in0 <= i < in1. Invariant: 0 <= in0 ; in1 <= buffer_len in1 < 0 indicates a closed channel. *) mutable in0 : int; mutable in1 : int; mutable end_of_file : bool; (* If we encounter an End_of_file exception, we set this flag to avoid reading again because we do not know how the channel will react past an end of file. That allows us to assume that reading past an end of file will keep raising End_of_file. *) current_field : Buffer.t; (* buffer reused to scan fields *) mutable record : string list; (* The current record *) mutable record_n : int; (* For error messages *) has_header : bool; header : Header.t; (* Convert the rows on demand (=> do not pay the price if one does not use that feature). *) separator : char; backslash_escape : bool; (* Whether \x is considered as an escape *) excel_tricks : bool; (* Whitespace related stripping functions: *) is_space : char -> bool; lstrip_buffer : Buffer.t -> unit; rstrip_substring : Bytes.t -> int -> int -> string; rstrip_contents : Buffer.t -> string; } (* * Helpers for input *) let is_space_or_tab c = c = ' ' || c = '\t' (* See documentation *) let is_real_space c = c = ' ' (* when separator = '\t' *) (* Given a buffer, returns its content stripped of *final* whitespace. *) let rstrip_contents buf = let n = ref(Buffer.length buf - 1) in while !n >= 0 && is_space_or_tab(Buffer.nth buf !n) do decr n done; Buffer.sub buf 0 (!n + 1) (* Return the substring after stripping its final space. It is assumed the substring parameters are valid. *) let rstrip_substring buf ofs len = let n = ref(ofs + len - 1) in while !n >= ofs && is_space_or_tab(Bytes.unsafe_get buf !n) do decr n done; Bytes.sub_string buf ofs (!n - ofs + 1) let do_nothing _ = () (* [fill_in_buf_or_Eof chan] refills in_buf if needed (when empty). After this [in0 < in1] or [in0 = in1 = 0], the latter indicating that there is currently no bytes to read (for a non-blocking channel). @raise End_of_file if there are no more bytes to read. *) let fill_in_buf_or_Eof ic = if ic.end_of_file then raise End_of_file; if ic.in0 >= ic.in1 then begin try ic.in0 <- 0; ic.in1 <- ic.in_chan#input ic.in_buf 0 buffer_len; with End_of_file -> ic.end_of_file <- true; raise End_of_file end (* * CSV input format parsing *) (* Array: char escaped with '\\' → char. Keep in sync with [escape]. *) let unescape = let escaped_by c = match Char.unsafe_chr c with | '0' -> '\000' (* \0 = NULL *) | 'b' -> '\b' | 'n' -> '\n' | 'r' -> '\r' | 't' -> '\t' | 'Z' -> '\026' (* Ctrl + Z, used by MySQL. *) | c -> c (* unchanged *) in Array.init 256 escaped_by (* Skip the possible '\n' following a '\r'. Reaching End_of_file is not considered an error -- just do nothing. *) let skip_CR ic = try fill_in_buf_or_Eof ic; if Bytes.unsafe_get ic.in_buf ic.in0 = '\n' then ic.in0 <- ic.in0 + 1 with End_of_file -> () (* Unquoted field. Read till a delimiter, a newline, or the end of the file. Skip the next delimiter or newline. @return [true] if more fields follow, [false] if the record is complete. *) let rec seek_unquoted_separator ic i = if i >= ic.in1 then ( (* End not found, need to look at the next chunk *) Buffer.add_subbytes ic.current_field ic.in_buf ic.in0 (i - ic.in0); ic.in0 <- i; fill_in_buf_or_Eof ic; seek_unquoted_separator ic 0 ) else let c = Bytes.unsafe_get ic.in_buf i in if c = ic.separator || c = '\n' || c = '\r' then ( if Buffer.length ic.current_field = 0 then (* Avoid copying the string to the buffer if unnecessary *) ic.record <- ic.rstrip_substring ic.in_buf ic.in0 (i - ic.in0) :: ic.record else ( Buffer.add_subbytes ic.current_field ic.in_buf ic.in0 (i - ic.in0); ic.record <- ic.rstrip_contents ic.current_field :: ic.record ); ic.in0 <- i + 1; if c = '\r' then (skip_CR ic; false) else (c = ic.separator) ) else seek_unquoted_separator ic (i+1) let add_unquoted_field ic = try seek_unquoted_separator ic ic.in0 with End_of_file -> ic.record <- ic.rstrip_contents ic.current_field :: ic.record; false (* Quoted field closed. Read past a separator or a newline and decode the field or raise [End_of_file]. @return [true] if more fields follow, [false] if the record is complete. *) let rec seek_quoted_separator ic field_no = fill_in_buf_or_Eof ic; let c = Bytes.unsafe_get ic.in_buf ic.in0 in ic.in0 <- ic.in0 + 1; if c = ic.separator || c = '\n' || c = '\r' then ( ic.record <- Buffer.contents ic.current_field :: ic.record; if c = '\r' then (skip_CR ic; false) else (c = ic.separator) ) else if is_space_or_tab c then seek_quoted_separator ic field_no (* skip space *) else raise(Failure(ic.record_n, field_no, "Non-space char after closing the quoted field")) let rec examine_quoted_field ic field_no after_quote i = if i >= ic.in1 then ( (* End of field not found, need to look at the next chunk *) Buffer.add_subbytes ic.current_field ic.in_buf ic.in0 (i - ic.in0); ic.in0 <- i; fill_in_buf_or_Eof ic; examine_quoted_field ic field_no after_quote 0 ) else let c = Bytes.unsafe_get ic.in_buf i in if c = '\"' then ( after_quote := true; (* Save the field so far, without the quote *) Buffer.add_subbytes ic.current_field ic.in_buf ic.in0 (i - ic.in0); ic.in0 <- i + 1; (* skip the quote *) (* The field up to [ic.in0] is saved, can refill if needed. *) fill_in_buf_or_Eof ic; (* possibly update [ic.in0] *) let c = Bytes.unsafe_get ic.in_buf ic.in0 in if c = '\"' then ( after_quote := false; (* [c] is kept so a quote will be included in the field *) examine_quoted_field ic field_no after_quote (ic.in0 + 1) ) else if c = ic.separator || is_space_or_tab c || c = '\n' || c = '\r' then ( seek_quoted_separator ic field_no (* field already saved; after_quote=true *) ) else if ic.excel_tricks && c = '0' then ( (* Supposedly, '"' '0' means ASCII NULL *) after_quote := false; Buffer.add_char ic.current_field '\000'; ic.in0 <- ic.in0 + 1; (* skip the '0' *) examine_quoted_field ic field_no after_quote ic.in0 ) else raise(Failure(ic.record_n, field_no, "Bad '\"' in quoted field")) ) else if ic.backslash_escape && c = '\\' then ( (* Save the field so far, without the backslash: *) Buffer.add_subbytes ic.current_field ic.in_buf ic.in0 (i - ic.in0); ic.in0 <- i + 1; (* skip the backslash *) fill_in_buf_or_Eof ic; (* possibly update [ic.in0] *) let c = Bytes.unsafe_get ic.in_buf ic.in0 in Buffer.add_char ic.current_field unescape.(Char.code c); ic.in0 <- ic.in0 + 1; (* skip the char [c]. *) examine_quoted_field ic field_no after_quote ic.in0 ) else examine_quoted_field ic field_no after_quote (i+1) let add_quoted_field ic field_no = let after_quote = ref false in (* preserved through exn *) try examine_quoted_field ic field_no after_quote ic.in0 with End_of_file -> (* Add the field even if not closed well *) ic.record <- Buffer.contents ic.current_field :: ic.record; if !after_quote then false (* = record is complete *) else raise(Failure(ic.record_n, field_no, "Quoted field closed by end of file")) let rec add_from_in_buf ic is_space = if ic.in0 < ic.in1 then let c = Bytes.unsafe_get ic.in_buf ic.in0 in if is_space c then ( Buffer.add_char ic.current_field c; ic.in0 <- ic.in0 + 1; add_from_in_buf ic is_space ) let add_spaces ic = (* Skip spaces: after this [in0] is a non-space char. *) add_from_in_buf ic ic.is_space; while ic.in0 >= ic.in1 do fill_in_buf_or_Eof ic; add_from_in_buf ic ic.is_space; done (* We suppose to be at the beginning of a field. Add the next field to [record]. @return [true] if more fields follow, [false] if the record is complete. Return Failure (if there is a format error), End_of_line (if the row is complete) or End_of_file (if there is not more data to read). *) let add_next_field ic field_no = Buffer.clear ic.current_field; try add_spaces ic; (* Now, in0 < in1 or End_of_file was raised *) let c = Bytes.unsafe_get ic.in_buf ic.in0 in if c = '\"' then ( ic.in0 <- ic.in0 + 1; Buffer.clear ic.current_field; (* remove spaces *) add_quoted_field ic field_no ) else if ic.excel_tricks && c = '=' then ( ic.in0 <- ic.in0 + 1; (* mark '=' as read *) try fill_in_buf_or_Eof ic; if Bytes.unsafe_get ic.in_buf ic.in0 = '\"' then ( (* Excel trick ="..." to prevent spaces around the field to be removed. *) ic.in0 <- ic.in0 + 1; (* skip '"' *) add_quoted_field ic field_no ) else ( ic.lstrip_buffer ic.current_field; (* remove spaces *) Buffer.add_char ic.current_field '='; add_unquoted_field ic ) with End_of_file -> ic.record <- "=" :: ic.record; false ) else ( ic.lstrip_buffer ic.current_field; (* remove spaces *) add_unquoted_field ic ) with End_of_file -> (* If it is the first field, coming from [next()], the field is made of spaces. If after the first, we are sure we read a delimiter before (but maybe the field is empty). Thus add en empty field. *) ic.record <- "" :: ic.record; false let next ic = if ic.in1 < 0 then raise(Sys_error "Bad file descriptor"); fill_in_buf_or_Eof ic; (* End_of_file means no more records *) ic.record <- []; ic.record_n <- ic.record_n + 1; (* the current line being read *) let more_fields = ref true and field_no = ref 1 in (* the current field being read *) while !more_fields do more_fields := add_next_field ic !field_no; incr field_no; done; ic.record <- List.rev ic.record; ic.record let current_record ic = ic.record let fold_left ~f ~init:a0 ic = let a = ref a0 in try while true do a := f !a (next ic) done; assert false with End_of_file -> !a let iter ~f ic = try while true do f (next ic) done; with End_of_file -> () let input_all ic = List.rev(fold_left ~f:(fun l r -> r :: l) ~init:[] ic) let fold_right ~f ic a0 = (* We to collect all records before applying [f] -- last row first. *) let lr = fold_left ~f:(fun l r -> r :: l) ~init:[] ic in List.fold_left (fun a r -> f r a) a0 lr (* * Creating a handle, possibly with header *) let of_in_obj ?(separator=',') ?(strip=true) ?(has_header=false) ?header ?(backslash_escape=false) ?(excel_tricks=true) in_chan = let ic = { in_chan = in_chan; in_buf = Bytes.create buffer_len; in0 = 0; in1 = 0; end_of_file = false; current_field = Buffer.create 0xFF; record = []; record_n = 0; (* => first record numbered 1 *) has_header = has_header || header <> None; header = Header.empty; separator = separator; backslash_escape; excel_tricks = excel_tricks; (* Stripping *) is_space = (if separator = '\t' then is_real_space else is_space_or_tab); lstrip_buffer = (if strip then Buffer.clear else do_nothing); rstrip_substring = (if strip then rstrip_substring else Bytes.sub_string); rstrip_contents = (if strip then rstrip_contents else Buffer.contents); } in if has_header then ( (* Try to initialize headers with the first record that is read. *) try let names = next ic in let h = Header.of_names names in let h = match header with | None -> h | Some h0 -> Header.merge ~main:(Header.of_names h0) h in { ic with header = h } with End_of_file | Failure _ -> ic ) else ( (* The channel does not contain a header. *) match header with | None -> ic | Some h0 -> { ic with header = Header.of_names h0 } ) let of_channel ?separator ?strip ?has_header ?header ?backslash_escape ?excel_tricks fh = of_in_obj ?separator ?strip ?has_header ?header ?backslash_escape ?excel_tricks (object val fh = fh method input s ofs len = try let r = Pervasives.input fh s ofs len in if r = 0 then raise End_of_file; r with Sys_blocked_io -> 0 method close_in() = Pervasives.close_in fh end) let of_string ?separator ?strip ?has_header ?header ?backslash_escape ?excel_tricks str = of_in_obj ?separator ?strip ?has_header ?header ?backslash_escape ?excel_tricks (object val mutable position = 0 method input buf ofs len = if position >= String.length str then raise End_of_file else ( let actual = min len (String.length str - position) in String.blit str position buf ofs actual ; position <- position + actual ; actual ) method close_in() = () end) let close_in ic = if ic.in1 >= 0 then begin ic.in0 <- 0; ic.in1 <- -1; ic.in_chan#close_in(); (* may raise an exception *) end let to_in_obj ic = object val ic = ic method input buf ofs len = if ofs < 0 || len < 0 || ofs + len > Bytes.length buf then invalid_arg "Csv.to_in_obj#input"; if ic.in1 < 0 then raise(Sys_error "Bad file descriptor"); fill_in_buf_or_Eof ic; let r = min len (ic.in1 - ic.in0) in Bytes.blit ic.in_buf ic.in0 buf ofs r; ic.in0 <- ic.in0 + r; r method close_in() = close_in ic end let load ?separator ?strip ?backslash_escape ?excel_tricks fname = let fh = if fname = "-" then stdin else open_in fname in let csv = of_channel ?separator ?strip ?backslash_escape ?excel_tricks fh in let t = input_all csv in close_in csv; t let load_in ?separator ?strip ?backslash_escape ?excel_tricks ch = input_all (of_channel ?separator ?strip ?backslash_escape ?excel_tricks ch) (* @deprecated *) let load_rows ?separator ?strip ?backslash_escape ?excel_tricks f ch = iter ~f (of_channel ?separator ?strip ?backslash_escape ?excel_tricks ch) (* * Output *) (* Arrays for backslash-escaping. *) let must_escape = Array.make 256 false let () = List.iter (fun c -> must_escape.(Char.code c) <- true) ['\"'; '\\'; '\000'; '\b'; '\n'; '\r'; '\t'; '\026'] let escape = (* Keep in sync with [unescape]. *) let escape_of c = match Char.unsafe_chr c with | '\000' -> '0' (* esape: \0 *) | '\b' -> 'b' | '\n' -> 'n' | '\r' -> 'r' | '\t' -> 't' | '\026' -> 'Z' | c -> c in Array.init 256 escape_of (* FIXME: Rework this part *) type out_channel = { out_chan : out_obj_channel; out_separator : char; out_separator_bytes : Bytes.t; out_backslash_escape : bool; out_excel_tricks : bool; } let to_out_obj ?(separator=',') ?(backslash_escape=false) ?(excel_tricks=false) out_chan = { out_chan = out_chan; out_separator = separator; out_separator_bytes = Bytes.make 1 separator; out_backslash_escape = backslash_escape; out_excel_tricks = excel_tricks; } let to_channel ?separator ?backslash_escape ?excel_tricks fh = to_out_obj ?separator ?backslash_escape ?excel_tricks (object val fh = fh method output s ofs len = output fh s ofs len; len method close_out () = close_out fh end) let to_buffer ?separator ?backslash_escape ?excel_tricks buf = to_out_obj ?separator ?backslash_escape ?excel_tricks (object method output s ofs len = Buffer.add_subbytes buf s ofs len; len method close_out () = () end) let rec really_output oc s ofs len = let w = oc.out_chan#output s ofs len in if w < len then really_output oc s (ofs+w) (len-w) let quote_bytes = Bytes.make 1 '\"' let output_quote oc = really_output oc quote_bytes 0 1 let equal_quote_bytes = Bytes.make 2 '=' let () = Bytes.unsafe_set equal_quote_bytes 1 '\"' let output_equal_quote oc = really_output oc equal_quote_bytes 0 2 let newline_bytes = Bytes.make 1 '\n' let output_newline oc = really_output oc newline_bytes 0 1 (* Determine whether the string s must be quoted and how many chars it must be extended to contain the escaped values. Return -1 if there is no need to quote. It is assumed that the string length [len] is > 0. *) let must_quote oc s len = let quote = ref(is_space_or_tab(String.unsafe_get s 0) || is_space_or_tab(String.unsafe_get s (len - 1))) in let n = ref 0 in for i = 0 to len - 1 do let c = String.unsafe_get s i in if oc.out_backslash_escape && must_escape.(Char.code c) then ( (* Must be done first because backslash escaping will be favored, even for the separator, '\n',... *) quote := true; incr n) else if c = oc.out_separator || c = '\n' || c = '\r' then quote := true else if c = '"' || (oc.out_excel_tricks && c = '\000') then ( quote := true; incr n) done; if !quote then !n else -1 let need_excel_trick s len = let c = String.unsafe_get s 0 in is_space_or_tab c || c = '0' || is_space_or_tab(String.unsafe_get s (len - 1)) (* Do some work to avoid quoting a field unless it is absolutely required. *) let write_escaped oc field = if String.length field > 0 then begin let len = String.length field in let use_excel_trick = oc.out_excel_tricks && need_excel_trick field len and n = must_quote oc field len in if n < 0 && not use_excel_trick then (* [really_output] does not mutate the [Bytes.t] argument. *) really_output oc (Bytes.unsafe_of_string field) 0 len else ( let field = if n <= 0 then Bytes.unsafe_of_string field else (* There are some quotes to escape *) let s = Bytes.create (len + n) in let j = ref 0 in for i = 0 to len - 1 do let c = String.unsafe_get field i in if oc.out_backslash_escape && must_escape.(Char.code c) then ( Bytes.unsafe_set s !j '\\'; incr j; Bytes.unsafe_set s !j escape.(Char.code c); incr j ) else if c = '"' then ( Bytes.unsafe_set s !j '"'; incr j; Bytes.unsafe_set s !j '"'; incr j ) else if oc.out_excel_tricks && c = '\000' then ( Bytes.unsafe_set s !j '"'; incr j; Bytes.unsafe_set s !j '0'; incr j ) else (Bytes.unsafe_set s !j c; incr j) done; s in if use_excel_trick then output_equal_quote oc else output_quote oc; really_output oc field 0 (Bytes.length field); output_quote oc ) end let output_record oc = function | [] -> output_newline oc | [f] -> write_escaped oc f; output_newline oc | f :: tl -> write_escaped oc f; List.iter (fun f -> really_output oc oc.out_separator_bytes 0 1; write_escaped oc f; ) tl; output_newline oc let output_all oc t = List.iter (fun r -> output_record oc r) t let print ?separator ?backslash_escape ?excel_tricks t = let csv = to_channel ?separator ?backslash_escape ?excel_tricks stdout in output_all csv t; flush stdout let save_out ?separator ?backslash_escape ?excel_tricks ch t = let csv = to_channel ?separator ?backslash_escape ?excel_tricks ch in output_all csv t let save ?separator ?backslash_escape ?excel_tricks fname t = let ch = open_out fname in let csv = to_channel ?separator ?backslash_escape ?excel_tricks ch in output_all csv t; close_out ch (* * Reading rows with headers *) module Rows = struct let header ic = Header.names ic.header let current ic = Row.make ic.header ic.record let next ic = Row.make ic.header (next ic) (* The convenience higher order functions are defined in terms of [next] in the same way as above. *) let fold_left ~f ~init:a0 ic = let a = ref a0 in try while true do a := f !a (next ic) done; assert false with End_of_file -> !a let iter ~f ic = try while true do f (next ic) done; with End_of_file -> () let input_all ic = List.rev(fold_left ~f:(fun l r -> r :: l) ~init:[] ic) let fold_right ~f ic a0 = (* We to collect all records before applying [f] -- last row first. *) let lr = fold_left ~f:(fun l r -> r :: l) ~init:[] ic in List.fold_left (fun a r -> f r a) a0 lr let load ?separator ?strip ?has_header ?header ?backslash_escape ?excel_tricks fname = let fh = if fname = "-" then stdin else open_in fname in let csv = of_channel ?separator ?strip ?has_header ?header ?backslash_escape ?excel_tricks fh in let t = input_all csv in close_in csv; t end (* * Acting on CSV data in memory *) let lines = List.length let columns csv = let m = ref 0 in List.iter (fun row -> m := max !m (List.length row)) csv; !m let rec dropwhile f = function | [] -> [] | x :: xs when f x -> dropwhile f xs | xs -> xs let rec empty_row = function | [] -> true | "" :: xs -> empty_row xs | _ :: _ -> false let trim ?(top=true) ?(left=true) ?(right=true) ?(bottom=true) csv = let csv = if top then dropwhile empty_row csv else csv in let csv = if right then List.map (fun row -> let row = List.rev row in let row = dropwhile ((=) "") row in let row = List.rev row in row) csv else csv in let csv = if bottom then ( let csv = List.rev csv in let csv = dropwhile empty_row csv in let csv = List.rev csv in csv ) else csv in let and_empty_left_cell (col_empty, one_nonempty_row) = function | [] -> col_empty, one_nonempty_row | "" :: _ -> col_empty, true | _ -> false, true in let empty_left_col = List.fold_left and_empty_left_cell (true, false) in let remove_left_col = List.map (function [] -> [] | _ :: xs -> xs) in let rec loop csv = let left_col_empty, one_nonempty_row = empty_left_col csv in if left_col_empty && one_nonempty_row then loop(remove_left_col csv) else csv in let csv = if left then loop csv else csv in csv let square csv = let columns = columns csv in List.map ( fun row -> let n = List.length row in let row = List.rev row in let rec loop acc = function | 0 -> acc | i -> "" :: loop acc (i-1) in let row = loop row (columns - n) in List.rev row ) csv let is_square csv = let columns = columns csv in List.for_all (fun row -> List.length row = columns) csv let rec set_columns ~cols = function | [] -> [] | r :: rs -> let rec loop i cells = if i < cols then ( match cells with | [] -> "" :: loop (succ i) [] | c :: cs -> c :: loop (succ i) cs ) else [] in loop 0 r :: set_columns ~cols rs let rec set_rows ~rows csv = if rows > 0 then ( match csv with | [] -> [] :: set_rows ~rows:(pred rows) [] | r :: rs -> r :: set_rows ~rows:(pred rows) rs ) else [] let set_size ~rows ~cols csv = set_columns ~cols (set_rows ~rows csv) (* from extlib: *) let rec drop n = function | _ :: l when n > 0 -> drop (n-1) l | l -> l let sub ~r ~c ~rows ~cols csv = let csv = drop r csv in let csv = List.map (drop c) csv in let csv = set_rows ~rows csv in let csv = set_columns ~cols csv in csv (* Compare two rows for semantic equality - ignoring any blank cells * at the end of each row. *) let rec compare_row (row1 : string list) row2 = match row1, row2 with | [], [] -> 0 | x :: xs, y :: ys -> let c = compare x y in if c <> 0 then c else compare_row xs ys | "" :: xs , [] -> compare_row xs [] | _ :: _, [] -> 1 | [], "" :: ys -> compare_row [] ys | [], _ :: _ -> -1 (* Semantic equality for CSV files. *) let rec compare (csv1 : t) csv2 = match csv1, csv2 with | [], [] -> 0 | x :: xs, y :: ys -> let c = compare_row x y in if c <> 0 then c else compare xs ys | x :: xs, [] -> let c = compare_row x [] in if c <> 0 then c else compare xs [] | [], y :: ys -> let c = compare_row [] y in if c <> 0 then c else compare [] ys (* Concatenate - arrange left to right. *) let rec concat = function | [] -> [] | [csv] -> csv | left_csv :: csvs -> (* Concatenate the remaining CSV files. *) let right_csv = concat csvs in (* Set the height of the left and right CSVs to the same. *) let nr_rows = max (lines left_csv) (lines right_csv) in let left_csv = set_rows ~rows:nr_rows left_csv in let right_csv = set_rows ~rows:nr_rows right_csv in (* Square off the left CSV. *) let left_csv = square left_csv in (* Prepend the right CSV rows with the left CSV rows. *) List.map ( fun (left_row, right_row) -> List.append left_row right_row ) (List.combine left_csv right_csv) let transpose = (* Suppose the CSV data is presented with the last row first. Then new rows may be constructed in a tail rec way. We use mutable rows in order to preserve tail recursiveness. *) (* Return the new 1st row; whether all rows are empty. *) let rec row_of_1st_col tr_row empty = function | [] -> (tr_row, empty) (* No more rows *) | r :: rows -> match !r with | [] -> (* Last row empty *) let tr_row = if tr_row = [] then tr_row else "" :: tr_row in row_of_1st_col tr_row empty rows | a :: tl -> r := tl; let tr_row = if a = "" && tr_row = [] then [] else a :: tr_row in row_of_1st_col tr_row false rows in let rec tr tr_csv csv = let row, empty = row_of_1st_col [] true csv in (* remove [csv] 1st col *) if empty then List.rev tr_csv else tr (row :: tr_csv) csv in fun csv -> tr [] (List.rev_map ref csv) let to_array csv = Array.of_list (List.map Array.of_list csv) let of_array csv = List.map Array.to_list (Array.to_list csv) let associate header data = let nr_cols = List.length header in let rec trunc = function | 0, _ -> [] | n, [] -> "" :: trunc (n-1, []) | n, (x :: xs) -> x :: trunc (n-1, xs) in List.map ( fun row -> let row = trunc (nr_cols, row) in List.combine header row ) data let map ~f csv = List.map (fun row -> List.map (fun el -> f el) row) csv let save_out_readable chan csv = (* Escape all the strings in the CSV file first. *) (* XXX Why are we doing this? I commented it out anyway. let csv = List.map (List.map String.escaped) csv in *) (* Find the width of each column. *) let widths = (* Don't consider rows with only a single element - typically * long titles. *) let csv = List.filter (function [_] -> false | _ -> true) csv in (* Square the CSV file - makes the next step simpler to implement. *) let csv = square csv in match csv with | [] -> [] | row1 :: rest -> let lengths_row1 = List.map String.length row1 in let lengths_rest = List.map (List.map String.length) rest in let max2rows r1 r2 = let rp = try List.combine r1 r2 with Invalid_argument _ -> failwith (Printf.sprintf "Csv.save_out_readable: internal \ error: length r1 = %d, length r2 = %d" (List.length r1) (List.length r2)) in List.map (fun ((a : int), (b : int)) -> max a b) rp in List.fold_left max2rows lengths_row1 lengths_rest in (* Print out each cell at the correct width. *) let rec repeat f = function | 0 -> () | i -> f (); repeat f (i-1) in List.iter ( function | [cell] -> (* Single column. *) output_string chan cell; output_char chan '\n' | row -> (* Other. *) (* Pair up each cell with its max width. *) let row = let rec loop = function | ([], _) -> [] | (_, []) -> failwith "Csv.save_out_readable: internal error" | (cell :: cells, width :: widths) -> (cell, width) :: loop (cells, widths) in loop (row, widths) in List.iter ( fun (cell, width) -> output_string chan cell; let n = String.length cell in repeat (fun () -> output_char chan ' ') (width - n + 1) ) row; output_char chan '\n' ) csv let print_readable = save_out_readable stdout ocaml-csv-1.4.2/src/csv.mli000066400000000000000000000451251261026267100155010ustar00rootroot00000000000000(* File: csv.mli Copyright (C) 2006 Richard Jones email: rjones@redhat.com Christophe Troestler email: Christophe.Troestler@umons.ac.be WWW: http://math.umons.ac.be/anum/software/ This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 2.1 or later as published by the Free Software Foundation, with the special exception on linking described in the file LICENSE. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the file LICENSE for more details. *) (** Read and write the CSV (comma separated values) format. This library should be compatible with {{:https://tools.ietf.org/html/rfc4180} RFC4180} if one sets [strip=false] in the creation functions. @author Richard Jones @author Christophe Troestler *) type t = string list list (** Representation of CSV data in memory. This is a list of rows (also called records), each row being a list of columns. *) (** {2 Input/output objects} *) (** The most basic input object for best interoperability. *) class type in_obj_channel = object method input : Bytes.t -> int -> int -> int (** [input buf ofs len] reads up to [len] octets from the channel and puts them in the substring [buf.[ofs .. ofs+len-1]]. Returns the number of octets actually read (and stored). When the channel is non-blocking, and there are currently no bytes to read, the number 0 will be returned. @raise End_of_file when there are no more bytes to read. *) method close_in : unit -> unit (** Closes the channel for input. *) end (** The most basic output object for best interoperability. *) class type out_obj_channel = object method output : Bytes.t -> int -> int -> int (** [output s ofs len] writes up to [len] bytes of the substring [s.[ofs .. ofs+len-1]]. Return the number of bytes actually written. When the channel is non-blocking, and there are currently no bytes to write, the number 0 must be returned. *) method close_out : unit -> unit (** Flushes the buffer, if any, and closes the channel for output. *) end (** {2 Input} *) exception Failure of int * int * string (** [Failure(nrecord, nfield, msg)] is raised to indicate a parsing error for the field number [nfield] on the record number [nrecord], the description [msg] says what is wrong. The first record and the first field of a record are numbered [1] (to correspond to the usual spreadsheet numbering but differing from [List.nth] of the OCaml representation). *) type in_channel (** Stateful handle to input CSV files. *) val of_in_obj : ?separator:char -> ?strip: bool -> ?has_header: bool -> ?header: string list -> ?backslash_escape: bool -> ?excel_tricks:bool -> in_obj_channel -> in_channel (** [of_in_obj ?separator ?excel_tricks in_chan] creates a new "channel" to access the data in CSV form available from the channel [in_chan]. @param separator What character the separator is. The default is [',']. You should be aware however that, in the countries where comma is used as a decimal separator, Excel will use [';'] as the separator. @param strip Whether to remove the white space around unquoted fields. The default is [true] for backward compatibility reasons. @param has_header tells that the first row of the CSV channel is to be interpreted as a header (this row will not be returned by {!next}). This is useful to use the functions in the {!Rows} module below. Default: [false]. @param header Supply the header to use for this CSV channel. If both [header] and [has_header] are given, the names of [header] take precedence; if a name in [header] is [""], the one in the CSV header is used. If a name appears twice, only its first occurrence is used. @param backslash_escape Whether to allow \", \n,... in quoted fields. This is used by MySQL for example but is not standard CSV so it is set to [false] by default. @param excel_tricks enables Excel tricks, namely the fact that '"' followed by '0' in a quoted string means ASCII NULL and the fact that a field of the form ="..." only returns the string inside the quotes. Default: [true]. *) val of_channel : ?separator:char -> ?strip: bool -> ?has_header: bool -> ?header: string list -> ?backslash_escape: bool -> ?excel_tricks:bool -> Pervasives.in_channel -> in_channel (** Same as {!Csv.of_in_obj} except that the data is read from a standard channel. *) val of_string : ?separator:char -> ?strip: bool -> ?has_header: bool -> ?header: string list -> ?backslash_escape: bool -> ?excel_tricks:bool -> string -> in_channel (** Same as {!Csv.of_in_obj} except that the data is read from a string. *) val load : ?separator:char -> ?strip: bool -> ?backslash_escape: bool -> ?excel_tricks:bool-> string -> t (** [load fname] loads the CSV file [fname]. If [filename] is ["-"] then load from [stdin]. @param separator What character the separator is. The default is [',']. You should be aware however that, in the countries where comma is used as a decimal separator, Excel will use [';'] as the separator. @param backslash_escape Whether to allow \", \n,... in quoted fields. This is used by MySQL for example but is not standard CSV so it is set to [false] by default. @param excel_tricks enables Excel tricks, namely the fact that '"' followed by '0' in a quoted string means ASCII NULL and the fact that a field of the form ="..." only returns the string inside the quotes. Default: [true]. *) val load_in : ?separator:char -> ?strip: bool -> ?backslash_escape: bool -> ?excel_tricks:bool -> Pervasives.in_channel -> t (** [load_in ch] loads a CSV file from the input channel [ch]. See {!Csv.load} for the meaning of [separator] and [excel_tricks]. *) val to_in_obj : in_channel -> in_obj_channel (** For efficiency reasons, the [in_channel] buffers the data from the original channel. If you want to examine the data by other means than the methods below (say after a failure), you need to use this function in order not to "loose" data in the buffer. *) val close_in : in_channel -> unit (** [close_in ic] closes the channel [ic]. The underlying channel is closed as well. *) val next : in_channel -> string list (** [next ic] returns the next record in the CSV file. @raise End_of_file if no more record can be read. @raise Csv.Failure if the CSV format is not respected. The partial record read is available with [#current_record]. *) val fold_left : f:('a -> string list -> 'a) -> init:'a -> in_channel -> 'a (** [fold_left f a ic] computes (f ... (f (f a r0) r1) ... rN) where r1,...,rN are the records in the CSV file. If [f] raises an exception, the record available at that moment is accessible through {!Csv.current_record}. *) val fold_right : f:(string list -> 'a -> 'a) -> in_channel -> 'a -> 'a (** [fold_right f ic a] computes (f r1 ... (f rN-1 (f rN a)) ...) where r1,...,rN-1, rN are the records in the CSV file. All records are read before applying [f] so this method is not convenient if your file is large. *) val iter : f:(string list -> unit) -> in_channel -> unit (** [iter f ic] iterates [f] on all remaining records. If [f] raises an exception, the record available at that moment is accessible through {!Csv.current_record}. *) val input_all : in_channel -> t (** [input_all ic] return a list of the CSV records till the end of the file. *) val current_record : in_channel -> string list (** The current record under examination. This is useful in order to gather the parsed data in case of [Failure]. *) val load_rows : ?separator:char -> ?strip: bool -> ?backslash_escape: bool -> ?excel_tricks:bool -> (string list -> unit) -> Pervasives.in_channel -> unit (** @deprecated use {!Csv.iter} on a {!Csv.in_channel} created with {!Csv.of_channel}. *) (************************************************************************) (** {2 Output} *) type out_channel val to_out_obj : ?separator:char -> ?backslash_escape: bool -> ?excel_tricks:bool -> out_obj_channel -> out_channel (** [to_out_obj ?separator ?excel_tricks out_chan] creates a new "channel" to output the data in CSV form. @param separator What character the separator is. The default is [',']. @param backslash_escape Prefer to escape the separator in a quoted string with a backslash (e.g. "\"") instead of doubling it. Also backslash-escape '\n', '\r', '\t', '\b', '\026' (as '\Z') and '\000' (as '\0') This is nice for interoperability but is nonstandard CSV to it is set to [false] by default. @param excel_tricks enables Excel tricks, namely the fact that '\000' is represented as '"' followed by '0' and the fact that a field with leading or trailing spaces or a leading '0' will be encoded as ="..." (to avoid Excel "helping" you). Default: [false]. *) val to_channel : ?separator:char -> ?backslash_escape: bool -> ?excel_tricks:bool -> Pervasives.out_channel -> out_channel (** Same as {!Csv.to_out_obj} but output to a standard channel. *) val to_buffer : ?separator:char -> ?backslash_escape: bool -> ?excel_tricks:bool -> Buffer.t -> out_channel (** Same as {!Csv.to_out_obj} but output to a buffer. *) val output_record : out_channel -> string list -> unit (** [output_record oc r] write the record [r] is CSV form to the channel [oc]. *) val output_all : out_channel -> t -> unit (** [output_all oc csv] outputs all records in [csv] to the channel [oc]. *) val save_out : ?separator:char -> ?backslash_escape: bool -> ?excel_tricks:bool -> Pervasives.out_channel -> t -> unit (** @deprecated Save string list list to a channel. *) val save : ?separator:char -> ?backslash_escape: bool -> ?excel_tricks:bool -> string -> t -> unit (** [save fname csv] saves the [csv] data to the file [fname]. *) val print : ?separator:char -> ?backslash_escape: bool -> ?excel_tricks:bool -> t -> unit (** Print the CSV data. *) val print_readable : t -> unit (** Print the CSV data to [stdout] in a human-readable format. Not much is guaranteed about how the CSV is printed, except that it will be easier to follow than a "raw" output done with {!Csv.print}. This is a one-way operation. There is no easy way to parse the output of this command back into CSV data. *) val save_out_readable : Pervasives.out_channel -> t -> unit (** As for {!Csv.print_readable}, allowing the output to be sent to a channel. *) (************************************************************************) (** {2 Functions to access rows when a header is present} *) (** A row with a header. *) module Row : sig type t (** Representation of a row whose columns are accessible both by indices and by headers names. *) val get : t -> int -> string (** [get row i] returns the [i]th column of the row. The first column has index [0]. Since CSV allows a file to have rows of different lengths, this function never fails, it returns [""] if the column does not exist. *) val find : t -> string -> string (** [find row header] return the value of the colum labelled with [header] (or [""] if not such header has been declared). *) val to_list : t -> string list (** [to_list row] convert [row] to the usual representation, the list being in the column order. *) val to_assoc : t -> (string * string) list (** [to_assoc row] return an associative list of the row data as [(header, value)]. If no header is given for a column, [""] is used. *) val with_header : t -> string list -> t (** [with_header row h] return the [row] with headers [h]. If a name in [h] is [""], the name present in [row] is used. If a name is duplicated. *) end (** Accessing rows (when a header was provided). *) module Rows : sig val header : in_channel -> string list (** The header declared for this channel. *) val next : in_channel -> Row.t (** See {!Csv.next}. If no header was declared for the channel, this function will work but only access using {!Row.get} will work. *) val fold_left : f:('a -> Row.t -> 'a) -> init:'a -> in_channel -> 'a (** See {!Csv.fold_left}. *) val fold_right : f:(Row.t -> 'a -> 'a) -> in_channel -> 'a -> 'a (** See {!Csv.fold_right}. *) val iter : f:(Row.t -> unit) -> in_channel -> unit (** See {!Csv.iter}. *) val input_all : in_channel -> Row.t list (** See {!Csv.input_all}. *) val load : ?separator:char -> ?strip: bool -> ?has_header: bool -> ?header: string list -> ?backslash_escape: bool -> ?excel_tricks:bool -> string -> Row.t list (** See {!Csv.load}. *) val current : in_channel -> Row.t (** See {!Csv.current_record}. *) end (************************************************************************) (** {2 Functions acting on CSV data loaded in memory} *) val lines : t -> int (** Return the number of lines in a CSV data. *) val columns : t -> int (** Work out the (maximum) number of columns in a CSV file. Note that each line may be a different length, so this finds the one with the most columns. *) val trim : ?top:bool -> ?left:bool -> ?right:bool -> ?bottom:bool -> t -> t (** This takes a CSV file and trims empty cells. All four of the option arguments ([~top], [~left], [~right], [~bottom]) default to [true]. The exact behaviour is: [~right]: If true, remove any empty cells at the right hand end of any row. The number of columns in the resulting CSV structure will not necessarily be the same for each row. [~top]: If true, remove any empty rows (no cells, or containing just empty cells) from the top of the CSV structure. [~bottom]: If true, remove any empty rows from the bottom of the CSV structure. [~left]: If true, remove any empty columns from the left of the CSV structure. Note that [~left] and [~right] are quite different: [~left] considers the whole CSV structure, whereas [~right] considers each row in isolation. *) val square : t -> t (** Make the CSV data "square" (actually rectangular). This pads out each row with empty cells so that all rows are the same length as the longest row. After this operation, every row will have length {!Csv.columns}. *) val is_square : t -> bool (** Return true iff the CSV is "square" (actually rectangular). This means that each row has the same number of cells. *) val set_columns : cols:int -> t -> t (** [set_columns cols csv] makes the CSV data square by forcing the width to the given number of [cols]. Any short rows are padded with blank cells. Any long rows are truncated. *) val set_rows : rows:int -> t -> t (** [set_rows rows csv] makes the CSV data have exactly [rows] rows by adding empty rows or truncating rows as necessary. Note that [set_rows] does not make the CSV square. If you want it to be square, call either {!Csv.square} or {!Csv.set_columns} after. *) val set_size : rows:int -> cols:int -> t -> t (** [set_size rows cols csv] makes the CSV data square by forcing the size to [rows * cols], adding blank cells or truncating as necessary. It is the same as calling [set_columns cols (set_rows rows csv)] *) val sub : r:int -> c:int -> rows:int -> cols:int -> t -> t (** [sub r c rows cols csv] returns a subset of [csv]. The subset is defined as having top left corner at row [r], column [c] (counting from [0]) and being [rows] deep and [cols] wide. The returned CSV will be "square". *) val compare : t -> t -> int (** Compare two CSV files for equality, ignoring blank cells at the end of a row, and empty rows appended to one or the other. This is "semantic" equality - roughly speaking, the two CSV files would look the same if opened in a spreadsheet program. *) val concat : t list -> t (** Concatenate CSV files so that they appear side by side, arranged left to right across the page. Each CSV file (except the final one) is first squared. (To concatenate CSV files so that they appear from top to bottom, just use [List.concat]). *) val transpose : t -> t (** Permutes the lines and columns of the CSV data. Nonexistent cells become empty cells after transpose if they must be created. *) val to_array : t -> string array array val of_array : string array array -> t (** Convenience functions to convert to and from a matrix representation. [to_array] will produce a ragged matrix (not all rows will have the same length) unless you call {!Csv.square} first. *) val associate : string list -> t -> (string * string) list list (** [associate header data] takes a block of data and converts each row in turn into an assoc list which maps column header to data cell. Typically a spreadsheet will have the format: {v header1 header2 header3 data11 data12 data13 data21 data22 data23 ... v} This function arranges the data into a more usable form which is robust against changes in column ordering. The output of the function is: {v [ ["header1", "data11"; "header2", "data12"; "header3", "data13"]; ["header1", "data21"; "header2", "data22"; "header3", "data23"]; etc. ] v} Each row is turned into an assoc list (see [List.assoc]). If a row is too short, it is padded with empty cells ([""]). If a row is too long, it is truncated. You would typically call this function as: {[ let header, data = match csv with h :: d -> h, d | [] -> assert false;; let data = Csv.associate header data;; ]} The header strings are shared, so the actual space in memory consumed by the spreadsheet is not much larger. *) val map : f:(string -> string) -> t -> t (** [map f csv] applies [f] to all entries of [csv] and returns the resulting CSV. *) ocaml-csv-1.4.2/tests/000077500000000000000000000000001261026267100145475ustar00rootroot00000000000000ocaml-csv-1.4.2/tests/test.ml000066400000000000000000000056221261026267100160650ustar00rootroot00000000000000open Printf let do_testcsv ?separator ?strip ?backslash_escape filename expected = try let csv = Csv.load ?separator ?strip ?backslash_escape filename in if csv <> expected then ( printf "input file: %s\n" filename; printf "Csv library produced:\n"; Csv.print csv; printf "Expected:\n"; Csv.print expected; failwith "failed" ) with Csv.Failure(nrow, nfield, err) -> printf "The file %S line %i, field %i, does not conform to the CSV \ specifications: %s\n" filename nrow nfield err; failwith "failed" let () = do_testcsv "testcsv1.csv" [ [ "This is a test\nwith commas,,,,,\n\nand carriage returns." ] ] let () = do_testcsv "testcsv2.csv" [ [ "Normal field"; "Quoted field"; "Quoted field with \"\" quotes" ] ] let () = do_testcsv "testcsv3.csv" [ [ "" ]; [ ""; "" ]; [ ""; ""; "" ]; [ ""; ""; ""; "" ]; [ ""; ""; ""; ""; "" ] ] let () = do_testcsv "testcsv4.csv" [] let () = do_testcsv "testcsv5.csv" [ [ "This is a test\nwith commas,,,,,\n\nand carriage returns."; "a second field"; "a third field" ]; [ "a fourth field on a new line" ] ] let () = do_testcsv "testcsv6.csv" [ [ "This is a test\nwith commas,,,,,\n\nand carriage returns\nand \000"; "a second field"; "a third field" ]; [ "a fourth field on a new line" ] ] let () = do_testcsv "testcsv7.csv" [ [ "Initial"; "and"; "final"; ""; "spaces"; "do not matter" ]; [ " Quoted spaces "; "are"; " important " ] ] let () = do_testcsv "testcsv7.csv" ~strip:false [ [ " Initial "; " and "; " final"; " "; "\tspaces "; " do not matter " ]; [ " Quoted spaces "; " are"; " important " ] ] let () = do_testcsv ~separator:'\t' "testcsv8.csv" [["Foo"; "Bar"]; ["Baz"; "Boof"]; ["a"; ""; "c"]] let () = do_testcsv "testcsv10.csv" ~backslash_escape:true [["a"; "b\"c"; "d\\d\000"]] let () = let csv1 = [ [ "a"; "b"; "c"; ""; "" ]; [ "f"; "g"; "h"; "i"; "" ]; [ "" ]; [ ] ] in let csv2 = Csv.trim ~top:false ~left:false ~right:true ~bottom:true csv1 in assert(compare csv1 csv2 <> 0); assert(Csv.compare csv1 csv2 = 0) let () = let csv1 = [ [ "a"; "b"; "c"; ""; "" ]; [ "f"; "g"; "h"; "i"; "" ]; [ "" ]; [ ] ] in let csv2 = [ [ "a"; "b"; "c"; "d"; "" ]; [ "f"; "g"; "h"; "i"; "" ]; [ "" ]; [ ] ] in assert (Csv.compare csv1 csv2 < 0) let () = let csv1 = [ [ "a"; "b"; "c"; ""; "" ]; [ "f"; "g"; "h"; "i"; "" ]; [ "" ]; [ ] ] in let csv2 = [ [ "A"; "B"; "C"; ""; "" ]; [ "F"; "G"; "H"; "I"; "" ]; [ "" ]; [ ] ] in assert (Csv.map ~f:String.capitalize csv1 = csv2) let () = print_endline "All conformity tests succeeded." ocaml-csv-1.4.2/tests/test_header.ml000066400000000000000000000022461261026267100173740ustar00rootroot00000000000000open Printf let print = let print_assoc row = List.iter (fun (k,v) -> printf " (%s, %s)" k v) row; printf "\n" in fun csv -> List.iter print_assoc csv let testcsv ?has_header ?header filename expected = try let csv = Csv.Rows.load ?has_header ?header filename in let csv = List.map Csv.Row.to_assoc csv in if csv <> expected then ( printf "input file: %s\n" filename; printf "Csv library produced:\n"; print csv; printf "Expected:\n"; print expected; failwith "failed" ) with Csv.Failure(nrow, nfield, err) -> printf "The file %S line %i, field %i, does not conform to the CSV \ specifications: %s\n" filename nrow nfield err; failwith "failed" let () = testcsv "tests/testcsv11.csv" ~has_header:true [["h1", "a"; "h2", "b"; "", "c"; "h4", "d"]] let () = testcsv "tests/testcsv11.csv" ~has_header:true ~header:["q1"; ""; "q3"; "q4"] [["q1", "a"; "h2", "b"; "q3", "c"; "q4", "d"]] let () = testcsv "tests/testcsv2.csv" ~header:["h1"; ""; "h1"] (* duplicate header *) [ [ "h1", "Normal field"; "", "Quoted field"; "", "Quoted field with \"\" quotes" ] ] ocaml-csv-1.4.2/tests/test_write.ml000066400000000000000000000006511261026267100172740ustar00rootroot00000000000000open Printf let roundtrip ?excel_tricks csv = let buf = Buffer.create 128 in Csv.output_all (Csv.to_buffer buf ?excel_tricks) csv; let csv' = Csv.input_all (Csv.of_string (Buffer.contents buf)) in if Csv.compare csv csv' <> 0 then ( printf "Csv roundtrip:\n"; Csv.print csv'; printf "Expected:\n"; Csv.print csv; failwith "failed!" ) let () = roundtrip [ [ "01234567" ] ] ~excel_tricks:true ocaml-csv-1.4.2/tests/testcsv1.csv000066400000000000000000000000701261026267100170350ustar00rootroot00000000000000"This is a test with commas,,,,, and carriage returns."ocaml-csv-1.4.2/tests/testcsv10.csv000066400000000000000000000000231261026267100171130ustar00rootroot00000000000000a,"b\"c", "d\\d\0" ocaml-csv-1.4.2/tests/testcsv11.csv000066400000000000000000000000221261026267100171130ustar00rootroot00000000000000h1,h2,,h4 a,b,c,d ocaml-csv-1.4.2/tests/testcsv2.csv000066400000000000000000000000741261026267100170420ustar00rootroot00000000000000Normal field,"Quoted field","Quoted field with """" quotes" ocaml-csv-1.4.2/tests/testcsv3.csv000066400000000000000000000000161261026267100170370ustar00rootroot00000000000000 , ,, ,,, ,,,,ocaml-csv-1.4.2/tests/testcsv4.csv000066400000000000000000000000001261026267100170310ustar00rootroot00000000000000ocaml-csv-1.4.2/tests/testcsv5.csv000066400000000000000000000001621261026267100170430ustar00rootroot00000000000000"This is a test with commas,,,,, and carriage returns.",a second field,a third field a fourth field on a new lineocaml-csv-1.4.2/tests/testcsv6.csv000066400000000000000000000001701261026267100170430ustar00rootroot00000000000000"This is a test with commas,,,,, and carriage returns and "0",a second field,a third field a fourth field on a new lineocaml-csv-1.4.2/tests/testcsv7.csv000066400000000000000000000001321261026267100170420ustar00rootroot00000000000000 Initial , and , final, , spaces , do not matter " Quoted spaces ", are, " important " ocaml-csv-1.4.2/tests/testcsv8.csv000066400000000000000000000000361261026267100170460ustar00rootroot00000000000000"Foo" "Bar" "Baz" "Boof" a c ocaml-csv-1.4.2/tests/testcsv9.csv000066400000000000000000000000221261026267100170420ustar00rootroot00000000000000A1,A2,A3 B1,B2 C1