{
"translatorID": "951c027d-74ac-47d4-a107-9c3069ab7b48",
"label": "Embedded Metadata",
"creator": "Simon Kornblith and Avram Lyon",
"target": "",
"minVersion": "3.0.4",
"maxVersion": "",
"priority": 320,
"inRepository": true,
"translatorType": 4,
"browserSupport": "gcsibv",
"lastUpdated": "2021-03-07 18:58:39"
}
/*
***** BEGIN LICENSE BLOCK *****
Copyright © 2011 Avram Lyon and the Center for History and New Media
George Mason University, Fairfax, Virginia, USA
http://zotero.org
This file is part of Zotero.
Zotero is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Zotero is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with Zotero. If not, see .
***** END LICENSE BLOCK *****
*/
// attr()/text() v2
// eslint-disable-next-line
function attr(docOrElem,selector,attr,index){var elem=index?docOrElem.querySelectorAll(selector).item(index):docOrElem.querySelector(selector);return elem?elem.getAttribute(attr):null;}function text(docOrElem,selector,index){var elem=index?docOrElem.querySelectorAll(selector).item(index):docOrElem.querySelector(selector);return elem?elem.textContent:null;}
/* eslint-disable camelcase */
var HIGHWIRE_MAPPINGS = {
citation_title: "title",
citation_publication_date: "date", // perhaps this is still used in some old implementations
citation_cover_date: "date", // used e.g. by Springer http://link.springer.com/article/10.1023/A:1021669308832
citation_date: "date",
citation_journal_title: "publicationTitle",
citation_journal_abbrev: "journalAbbreviation",
citation_inbook_title: "publicationTitle", // used as bookTitle or proceedingTitle, e.g. http://pubs.rsc.org/en/content/chapter/bk9781849730518-00330/978-1-84973-051-8
citation_book_title: "bookTitle",
citation_volume: "volume",
citation_issue: "issue",
citation_series_title: "series",
citation_conference_title: "conferenceName",
citation_conference: "conferenceName",
citation_dissertation_institution: "university",
citation_technical_report_institution: "institution",
citation_technical_report_number: "number",
citation_publisher: "publisher",
citation_isbn: "ISBN",
citation_abstract: "abstractNote",
citation_doi: "DOI",
citation_public_url: "url",
citation_language: "language"
/* the following are handled separately in addHighwireMetadata()
"citation_author"
"citation_authors"
"citation_firstpage"
"citation_lastpage"
"citation_issn"
"citation_eIssn"
"citation_pdf_url"
"citation_abstract_html_url"
"citation_fulltext_html_url"
"citation_pmid"
"citation_online_date"
"citation_year"
"citation_keywords"
*/
};
/* eslint-enable */
// Maps actual prefix in use to URI
// The defaults are set to help out in case a namespace is not declared
// Copied from RDF translator
var _prefixes = {
bib: "http://purl.org/net/biblio#",
bibo: "http://purl.org/ontology/bibo/",
dc: "http://purl.org/dc/elements/1.1/",
dcterms: "http://purl.org/dc/terms/",
prism: "http://prismstandard.org/namespaces/1.2/basic/",
foaf: "http://xmlns.com/foaf/0.1/",
vcard: "http://nwalsh.com/rdf/vCard#",
link: "http://purl.org/rss/1.0/modules/link/",
z: "http://www.zotero.org/namespaces/export#",
eprint: "http://purl.org/eprint/terms/",
eprints: "http://purl.org/eprint/terms/",
og: "http://ogp.me/ns#", // Used for Facebook's OpenGraph Protocol
article: "http://ogp.me/ns/article#",
book: "http://ogp.me/ns/book#",
music: "http://ogp.me/ns/music#",
video: "http://ogp.me/ns/video#",
rdf: "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
};
var _prefixRemap = {
// DC should be in lower case
"http://purl.org/DC/elements/1.0/": "http://purl.org/dc/elements/1.0/",
"http://purl.org/DC/elements/1.1/": "http://purl.org/dc/elements/1.1/"
};
var namespaces = {};
var _haveItem = false,
_itemType;
var RDF;
var CUSTOM_FIELD_MAPPINGS;
function addCustomFields(customFields) {
CUSTOM_FIELD_MAPPINGS = customFields;
}
function setPrefixRemap(map) {
_prefixRemap = map;
}
function remapPrefix(uri) {
if (_prefixRemap[uri]) return _prefixRemap[uri];
return uri;
}
function getPrefixes(doc) {
var links = doc.getElementsByTagName("link");
for (let i = 0; i < links.length; i++) {
let link = links[i];
// Look for the schema's URI in our known schemata
var rel = link.getAttribute("rel");
if (rel) {
var matches = rel.match(/^schema\.([a-zA-Z]+)/);
if (matches) {
let uri = remapPrefix(link.getAttribute("href"));
// Zotero.debug("Prefix '" + matches[1].toLowerCase() +"' => '" + uri + "'");
_prefixes[matches[1].toLowerCase()] = uri;
}
}
}
// also look in html and head elements
var prefixes = (doc.documentElement.getAttribute('prefix') || '')
+ (doc.head.getAttribute('prefix') || '');
var prefixRE = /(\w+):\s+(\S+)/g;
var m;
while ((m = prefixRE.exec(prefixes))) {
let uri = remapPrefix(m[2]);
Z.debug("Prefix '" + m[1].toLowerCase() + "' => '" + uri + "'");
_prefixes[m[1].toLowerCase()] = uri;
}
}
// Boolean Parameters (default values false)
// * strict = false: compare only ending substring, e.g. bepress
// * strict = true: compare exactly
// * all = false: return only first match
// * all = true: concatenate all values
function getContentText(doc, name, strict, all) {
let csspath = 'html>head>meta[name' + (strict ? '="' : '$="') + name + '"]';
if (all) {
return Array.from(doc.querySelectorAll(csspath)).map(obj => obj.content || obj.contents).join(', ');
}
else {
return attr(doc, csspath, 'content') || attr(doc, csspath, 'contents');
}
}
function getContent(doc, name, strict) {
var xpath = '/x:html/x:head/x:meta['
+ (strict ? '@name' : 'substring(@name, string-length(@name)-' + (name.length - 1) + ')')
+ '="' + name + '"]/';
return ZU.xpath(doc, xpath + '@content | ' + xpath + '@contents', namespaces);
}
function fixCase(authorName) {
// fix case if all upper or all lower case
if (authorName.toUpperCase() === authorName
|| authorName.toLowerCase() === authorName) {
return ZU.capitalizeTitle(authorName, true);
}
return authorName;
}
function processFields(doc, item, fieldMap, strict) {
for (var metaName in fieldMap) {
var zoteroName = fieldMap[metaName];
// only concatenate values for ISSN and ISBN; otherwise take the first
var allValues = (zoteroName == "ISSN" || zoteroName == "ISBN");
var value = getContentText(doc, metaName, strict, allValues);
if (value && value.trim()) {
item[zoteroName] = ZU.trimInternal(value);
}
}
}
function completeItem(doc, newItem, hwType) {
// Strip off potential junk from RDF
newItem.seeAlso = [];
addHighwireMetadata(doc, newItem, hwType);
addOtherMetadata(doc, newItem);
addLowQualityMetadata(doc, newItem);
finalDataCleanup(doc, newItem);
if (CUSTOM_FIELD_MAPPINGS) {
processFields(doc, newItem, CUSTOM_FIELD_MAPPINGS, true);
}
newItem.complete();
}
// eslint-disable-next-line consistent-return
function detectWeb(doc, url) {
// blacklist wordpress jetpack comment plugin so it doesn't override other metadata
if (url.includes("jetpack.wordpress.com/jetpack-comment/")) return false;
if (exports.itemType) return exports.itemType;
init(doc, url, Zotero.done);
}
function init(doc, url, callback, forceLoadRDF) {
getPrefixes(doc);
var metaTags = doc.head.getElementsByTagName("meta");
Z.debug("Embedded Metadata: found " + metaTags.length + " meta tags.");
if (forceLoadRDF /* check if this is called from doWeb */ && !metaTags.length) {
if (doc.head) {
Z.debug(doc.head.innerHTML
.replace(/