diff -ur crossref.ori/CrossrefExportDeployment.inc.php crossref/CrossrefExportDeployment.inc.php
--- crossref.ori/CrossrefExportDeployment.inc.php 2019-12-04 19:09:49.488753925 +0200
+++ crossref/CrossrefExportDeployment.inc.php 2019-12-26 14:53:21.958517441 +0200
@@ -24,6 +24,8 @@
define('CROSSREF_XSI_SCHEMALOCATION' , 'https://www.crossref.org/schemas/crossref4.3.6.xsd');
define('CROSSREF_XMLNS_JATS' , 'http://www.ncbi.nlm.nih.gov/JATS1');
define('CROSSREF_XMLNS_AI' , 'http://www.crossref.org/AccessIndicators.xsd');
+define('CROSSREF_XMLNS_MML' , 'http://www.w3.org/1998/Math/MathML');
+
class CrossrefExportDeployment {
/** @var Context The current import/export context */
@@ -109,6 +111,14 @@
}
/**
+ * Get the MathML namespace URN
+ * @return string
+ */
+ function getMMLNamespace() {
+ return CROSSREF_XMLNS_MML;
+ }
+
+ /**
* Get the schema filename.
* @return string
*/
diff -ur crossref.ori/filter/ArticleCrossrefXmlFilter.inc.php crossref/filter/ArticleCrossrefXmlFilter.inc.php
--- crossref.ori/filter/ArticleCrossrefXmlFilter.inc.php 2019-12-04 19:09:49.480754148 +0200
+++ crossref/filter/ArticleCrossrefXmlFilter.inc.php 2019-12-26 19:11:13.016071242 +0200
@@ -92,7 +92,14 @@
// title
$titlesNode = $doc->createElementNS($deployment->getNamespace(), 'titles');
- $titlesNode->appendChild($node = $doc->createElementNS($deployment->getNamespace(), 'title', htmlspecialchars($submission->getTitle($submission->getLocale()), ENT_COMPAT, 'UTF-8')));
+ // We construct a string to be converted into a single XML node
+ // First collect data
+ $titleNode = $doc->createDocumentFragment();
+ $titleString = $this->latexToMML($submission->getTitle($submission->getLocale()));
+ $titleNodeString = '
' . $titleString . '';
+ // Insert as XML
+ $titleNode->appendXML($titleNodeString);
+ $titlesNode->appendChild($titleNode);
$journalArticleNode->appendChild($titlesNode);
// contributors
@@ -124,7 +131,12 @@
// abstract
if ($submission->getAbstract($submission->getLocale())) {
$abstractNode = $doc->createElementNS($deployment->getJATSNamespace(), 'jats:abstract');
- $abstractNode->appendChild($node = $doc->createElementNS($deployment->getJATSNamespace(), 'jats:p', htmlspecialchars(html_entity_decode(strip_tags($submission->getAbstract($submission->getLocale())), ENT_COMPAT, 'UTF-8'), ENT_COMPAT, 'UTF-8')));
+ //$abstractNode->appendChild($node = $doc->createElementNS($deployment->getJATSNamespace(), 'jats:p', htmlspecialchars(html_entity_decode(strip_tags($submission->getAbstract($submission->getLocale())), ENT_COMPAT, 'UTF-8'), ENT_COMPAT, 'UTF-8')));
+ $abstractJATSNode = $doc->createDocumentFragment();
+ $abstractString = $this->latexToMML($submission->getAbstract($submission->getLocale()));
+ $abstractNodeString = '' . $abstractString . '';
+ $abstractJATSNode->appendXML($abstractNodeString);
+ $abstractNode->appendChild($abstractJATSNode);
$journalArticleNode->appendChild($abstractNode);
}
@@ -324,7 +336,73 @@
}
return $componentListNode;
}
+
+ /**
+ * Convert LaTeX math to MathML in string
+ * @param $text string
+ * @return string
+ */
+ function latexToMML($text){
+ // Math separarots are $$ $$, $ $, \( \), \[ \], and math environments
+ // displaymath, equation, eqnarray, align, gather
+ $mathSeparators=[
+ '\$\$.+?\$\$',
+ '\\\\\[.+?\\\\\]',
+ '\\\\\(.+?\\\\\)' ,
+ '\$.+?\$',
+ '\\\\begin{displaymath}.+?\\\\end{displaymath}',
+ '\\\\begin{equation\*?}.+?\\\\end{equation\*?}',
+ '\\\\begin{eqnarray\*?}.+?\\\\end{eqnarray\*?}',
+ '\\\\begin{align\*?}.+?\\\\end{align\*?}',
+ '\\\\begin{gather\*?}.+?\\\\end{gather\*?}',
+ ];
+ $mathPattern = '/' . join('|', $mathSeparators) . '/s';
+ // Init array of formulas
+ $this->math = [];
+ // Replace each formuls with [[MATH]] string and save formulas in array
+ $noMathText = preg_replace_callback($mathPattern,
+ function ($matches){
+ $formula = array_pop($matches);
+ array_push($this->math, $formula);
+ return '[[MATH]]';
+ },
+ $text);
+ // Sanitize and cleanup, then convert formulas and insert them back
+ $cleanNoMathText = htmlspecialchars(html_entity_decode(strip_tags($noMathText, ''), ENT_COMPAT, 'UTF-8'), ENT_COMPAT, 'UTF-8');
+
+ $restoredText = preg_replace_callback('/\[\[MATH\]\]/',
+ function($matches){
+ $formula = array_shift($this->math);
+ return $this->latexFormulaToMML($formula);
+ },
+ $cleanNoMathText);
+ return $restoredText;
+
+ }
+ /**
+ * Run external program to convert single LaTeX formula to MathML
+ * We use commandline MathToWeb from mathtoweb.com
+ * @param $forumla string
+ * @return string
+ */
+ function latexFormulaToMML($formula){
+ $java = "/usr/bin/java";
+ $jar = "/var/www/mathtoweb/mathtoweb.jar";
+ // mathtowb complains on $$..$$
+ $formula = preg_replace('/^\$\$(.+)\$\$$/s', "\[$1\]", $formula);
+ //write data to temp file
+ $infile=tempnam('/tmp','math');
+ $handle=fopen($infile,"w");
+ fwrite($handle, $formula);
+ fclose($handle);
+ $outfile = $infile . '_MathToWeb_000';
+ system($java . " -jar " . $jar . " -ns mml -rep -unicode -force ". $infile . ">/dev/null" );
+ $mml = file_get_contents($outfile);
+ unlink($infile);
+ unlink($outfile);
+ return $mml;
+ }
}
diff -ur crossref.ori/filter/IssueCrossrefXmlFilter.inc.php crossref/filter/IssueCrossrefXmlFilter.inc.php
--- crossref.ori/filter/IssueCrossrefXmlFilter.inc.php 2019-12-04 19:09:49.480754148 +0200
+++ crossref/filter/IssueCrossrefXmlFilter.inc.php 2019-12-26 14:52:58.824796197 +0200
@@ -84,6 +84,7 @@
$rootNode->setAttributeNS('http://www.w3.org/2000/xmlns/', 'xmlns:xsi', $deployment->getXmlSchemaInstance());
$rootNode->setAttributeNS('http://www.w3.org/2000/xmlns/', 'xmlns:jats', $deployment->getJATSNamespace());
$rootNode->setAttributeNS('http://www.w3.org/2000/xmlns/', 'xmlns:ai', $deployment->getAINamespace());
+ $rootNode->setAttributeNS('http://www.w3.org/2000/xmlns/', 'xmlns:mml', $deployment->getMMLNamespace());
$rootNode->setAttribute('version', $deployment->getXmlSchemaVersion());
$rootNode->setAttribute('xsi:schemaLocation', $deployment->getNamespace() . ' ' . $deployment->getSchemaFilename());
return $rootNode;