var language = "English";
var L = {};
var isEbrary = false;
// returns an array of values for a given field or array of fields
// the values are in the same order as the field names
function getTextValue(doc, fields) {
if (typeof (fields) != 'object') fields = [fields];
// localize fields
fields =
function (field) {
if (fieldNames[language]) {
return fieldNames[language][field] || field;
else {
return field;
var allValues = [], values;
for (let i = 0, n = fields.length; i < n; i++) {
values = ZU.xpath(doc,
'//div[@class="display_record_indexing_fieldname" and normalize-space(text())="' + fields[i]
+ '"]/following-sibling::div[@class="display_record_indexing_data"][1]');
if (values.length) values = [values[0].textContent];
allValues = allValues.concat(values);
return allValues;
// initializes field map translations
function initLang(doc) {
var lang = ZU.xpathText(doc, '//a[span[contains(@class,"uxf-globe")]]');
if (lang && lang.trim() != "English") {
lang = lang.trim();
// if already initialized, don't need to do anything else
if (lang == language) return;
language = lang;
// build reverse field map
L = {};
for (let i in fieldNames[language]) {
L[fieldNames[language][i]] = i;
language = 'English';
L = {};
function getSearchResults(doc, checkOnly, extras) {
var root;
var elements = doc.getElementsByClassName('resultListContainer');
for (let i = 0; i < elements.length; i++) {
if (elements[i] && elements[i].offsetHeight > 0) {
root = elements[i];
if (!root) {
Z.debug("No root found");
return false;
var results = root.getElementsByClassName('resultItem');
// root.querySelectorAll('.resultTitle, .previewTitle');
var items = {}, found = false;
isEbrary = (results && results[0] && results[0].getElementsByClassName('ebraryitem').length > 0);
// if the first result is Ebrary, they all are - we're looking at the Ebrary results tab
for (let i = 0, n = results.length; i < n; i++) {
var title = results[i].querySelectorAll('h3 a')[0];
// Z.debug(title)
if (!title || !title.href) continue;
if (checkOnly) return true;
found = true;
var item = ZU.trimInternal(title.textContent);
var preselect = results[i].getElementsByClassName('marked_list_checkbox')[0];
if (preselect) {
item = {
title: item,
checked: preselect.checked
items[title.href] = item;
if (isEbrary && Zotero.isBookmarklet) {
extras[title.href] = {
html: results[i],
title: item,
url: title.href
return found ? items : false;
function detectWeb(doc, url) {
// Check for multiple first
if (!url.includes('docview') && !url.includes('pagepdf')) {
return getSearchResults(doc, true) ? 'multiple' : false;
// if we are on Abstract/Details page,
// then we can read the type from the corresponding field
var types = getTextValue(doc, ["Source type", "Document type", "Record type"]);
var zoteroType = getItemType(types);
if (zoteroType) return zoteroType;
// hack for NYTs, which misses crucial data.
var db = getTextValue(doc, "Database")[0];
if (db && db.includes("The New York Times")) {
return "newspaperArticle";
// there is not much information about the item type in the pdf/fulltext page
let titleRow = text(doc, '.open-access');
if (titleRow && !text(doc, '.ol-login-link')) {
if (getItemType([titleRow])) {
return getItemType([titleRow]);
// Fall back on journalArticle - even if we couldn't guess the type
return "journalArticle";
return false;
function doWeb(doc, url, noFollow) {
let type = detectWeb(doc, url);
if (type == "multiple") {
// detect web returned multiple
var resultData = {};
Zotero.selectItems(getSearchResults(doc, false, resultData), function (items) {
if (!items) return;
var articles = [];
for (let item in items) {
if (isEbrary) {
if (Zotero.isBookmarklet) {
// The bookmarklet can't use the ebrary translator
var refs = [];
for (let i in items) {
else {
ZU.processDocuments(articles, function (doc) {
var translator = Zotero.loadTranslator("web");
else {
ZU.processDocuments(articles, doWeb);
else {
var abstractTab = doc.getElementById('tab-AbstractRecord-null') // Seems like that null is a bug and it might change at some point
|| doc.getElementById('tab-Record-null'); // Shown as Details
if (!(abstractTab && !abstractTab.classList.contains('active'))) {
Zotero.debug("On Abstract page, scraping");
scrape(doc, url, type);
else if (noFollow) {
Z.debug('Not following link again. Attempting to scrape');
scrape(doc, url, type);
else {
var link = abstractTab.getElementsByTagName('a')[0];
if (!link) {
throw new Error("Could not find the abstract/metadata link");
Zotero.debug("Going to the Abstract tab");
ZU.processDocuments(link.href, function (doc, url) {
doWeb(doc, url, true);
function scrape(doc, url, type) {
var item = new Zotero.Item(type);
// get all rows
var rows = doc.getElementsByClassName('display_record_indexing_row');
let label, value, enLabel;
var dates = [], place = {}, altKeywords = [];
for (let i = 0, n = rows.length; i < n; i++) {
label = rows[i].childNodes[0];
value = rows[i].childNodes[1];
if (!label || !value) continue;
label = label.textContent.trim();
value = value.textContent.trim(); // trimInternal?
// translate label
enLabel = L[label] || label;
let creatorType;
switch (enLabel) {
case 'Title':
if (value == value.toUpperCase()) value = ZU.capitalizeTitle(value, true);
item.title = value;
case 'Author':
case 'Editor': // test case?
creatorType = (enLabel == 'Author') ? 'author' : 'editor';
// Use titles of a tags if they exist, since these don't include
// affiliations
value = ZU.xpathText(rows[i].childNodes[1], "a/@title", null, "; ") || value;
value = value.replace(/^by\s+/i, '') // sometimes the authors begin with "By"
for (let j = 0, m = value.length; j < m; j++) {
// TODO: might have to detect proper creator type from item type*/
ZU.cleanAuthor(value[j], creatorType, value[j].includes(',')));
case 'Publication title':
item.publicationTitle = value.replace(/;.+/, "");
case 'Volume':
item.volume = value;
case 'Issue':
item.issue = value;
case 'Number of pages':
item.numPages = value;
case 'ISSN':
item.ISSN = value;
case 'ISBN':
item.ISBN = value;
case 'DOI': // test case?
item.DOI = value;
case 'Copyright':
item.rights = value;
case 'Language of publication':
case 'Language':
item.language = value;
case 'Section':
item.section = value;
case 'Pages':
item.pages = value;
case 'First page':
item.firstPage = value;
case 'University/institution':
case 'School': = value;
case 'Degree':
item.thesisType = value;
case 'Publisher':
item.publisher = value;
case 'Identifier / keyword':
item.tags = value.split(/\s*(?:,|;)\s*/);
// alternative tags
case 'Subject':
case 'Journal subject':
case 'Publication subject':
// we'll figure out proper location later
case 'University location':
case 'School location':
place.schoolLocation = value;
case 'Place of publication':
place.publicationPlace = value;
case 'Country of publication':
place.publicationCountry = value;
// multiple dates are provided
// more complete dates are preferred
case 'Publication date':
dates[2] = value;
case 'Publication year':
dates[1] = value;
case 'Year':
dates[0] = value;
// we know about these, skip
case 'Source type':
case 'Document type':
case 'Record type':
case 'Database':
Z.debug('Unhandled field: "' + label + '": ' + value);
item.url = url.replace(/\baccountid=[^&#]*&?/, '').replace(/\?(?:#|$)/, '');
if (item.itemType == "thesis" && place.schoolLocation) { = place.schoolLocation;
else if (place.publicationPlace) { = place.publicationPlace;
if (place.publicationCountry) { = + ', ' + place.publicationCountry.replace(/,.+/, "");
} = dates.pop();
// Sometimes we can get first page and num pages for a journal article
if (item.firstPage && !item.pages) {
var firstPage = parseInt(item.firstPage);
var numPages = parseInt(item.numPages);
if (!numPages || numPages < 2) {
item.pages = item.firstPage;
else {
item.pages = firstPage + '' + (firstPage + numPages - 1);
// sometimes number of pages ends up in pages
if (!item.numPages) item.numPages = item.pages;
// don't override the university with a publisher information for a thesis
if (item.itemType == "thesis" && && item.publisher) {
delete item.publisher;
// lanuguage is sometimes given as full word and abbreviation
if (item.language) item.language = item.language.split(/\s*;\s*/)[0];
// parse some data from the byline in case we're missing publication title
// or the date is not complete
var byline = ZU.xpath(doc, '//span[contains(@class, "titleAuthorETC")][last()]');
// add publication title if we don't already have it
if (!item.publicationTitle
&& ZU.fieldIsValidForType('publicationTitle', item.itemType)) {
var pubTitle = ZU.xpathText(byline, './/a[@id="lateralSearch"]');
// remove date range
if (pubTitle) item.publicationTitle = pubTitle.replace(/\s*\(.+/, '');
var date = ZU.xpathText(byline, './text()');
if (date) date = date.match(/]\s+(.+?):/);
if (date) date = date[1];
// add date if we only have a year and date is longer in the byline
if (date
&& (!
|| ( <= 4 && date.length > { = date;
item.abstractNote = ZU.xpath(doc, '//div[contains(@id, "abstractSummary_")]/p')
.map(function (p) {
return ZU.trimInternal(p.textContent);
if (!item.tags.length && altKeywords.length) {
item.tags = altKeywords.join(',').split(/\s*(?:,|;)\s*/);
if (doc.getElementById('downloadPDFLink')) {
title: 'Full Text PDF',
url: doc.getElementById('downloadPDFLink').href,
mimeType: 'application/pdf',
proxy: false
else {
var fullText = ZU.xpath(doc, '//li[@id="tab-Fulltext-null"]/a')[0];
if (fullText) {
title: 'Full Text Snapshot',
url: fullText.href,
mimeType: 'text/html'
function getItemType(types) {
var guessType;
for (var i = 0, n = types.length; i < n; i++) {
// put the testString to lowercase and test for singular only for maxmial compatibility
// in most cases we just can return the type, but sometimes only save it as a guess and will use it only if we don't have anything better
var testString = types[i].toLowerCase();
if (testString.includes("journal") || testString.includes("periodical")) {
// "Scholarly Journals", "Trade Journals", "Historical Periodicals"
return "journalArticle";
else if (testString.includes("newspaper") || testString.includes("wire feed")) {
// "Newspapers", "Wire Feeds", "WIRE FEED", "Historical Newspapers"
return "newspaperArticle";
else if (testString.includes("dissertation")) {
// "Dissertations & Theses", "Dissertation/Thesis", "Dissertation"
return "thesis";
else if (testString.includes("chapter")) {
// "Chapter"
return "bookSection";
else if (testString.includes("book")) {
// "Book, Authored Book", "Book, Edited Book", "Books"
guessType = "book";
else if (testString.includes("conference paper")) {
// "Conference Papers and Proceedings", "Conference Papers & Proceedings"
return "conferencePaper";
else if (testString.includes("magazine")) {
// "Magazines"
return "magazineArticle";
else if (testString.includes("report")) {
// "Reports", "REPORT"
return "report";
else if (testString.includes("website")) {
// "Blogs, Podcats, & Websites"
guessType = "webpage";
else if (testString == "blog" || testString == "article in an electronic resource or web site") {
// "Blog", "Article In An Electronic Resource Or Web Site"
return "blogPost";
else if (testString.includes("patent")) {
// "Patent"
return "patent";
else if (testString.includes("pamphlet")) {
// Pamphlets & Ephemeral Works
guessType = "manuscript";
else if (testString.includes("encyclopedia")) {
// "Encyclopedias & Reference Works"
guessType = "encyclopediaArticle";
else if (testString.includes("statute")) {
return "statute";
return guessType;
function scrapeEbraryResults(refs) {
// Since we can't chase URLs, let's get what we can from the page
for (let i = 0; i < refs.length; i++) {
var ref = refs[i];
var hiddenData = ZU.xpathText(ref.html, './span');
var visibleData ='results_list_copy'), function (node) {
// The text returned by textContent is of the following format:
// book title \n author, first; [author, second; ...;] publisher name; publisher location (date) \n
return /\n(.*)\n?/.exec(node.textContent)[1].split(';').reverse();
var item = new Zotero.Item("book");
var date = /\(([\w\s]+)\)/.exec(visibleData[0]);
var place = /([\w,\s]+)\(/.exec(visibleData[0]);
var isbn = /isbn,\svalue\s=\s'([\dX]+)'/i.exec(hiddenData);
var language = /language_code,\svalue\s=\s'([A-Za-z]+)'\n/i.exec(hiddenData);
var numPages = /page_count,\svalue\s=\s'(\d+)'\n/i.exec(hiddenData);
var locNum = /lccn,\svalue\s=\s'([-.\s\w]+)'\n/i.exec(hiddenData);
item.title = ref.title;
item.url = ref.url;
if (date) { = date[1];
if (place) { = place[1].trim();
item.publisher = visibleData[1].trim();
// Push the authors in reverse to restore the original order
for (var j = visibleData.length - 1; j >= 2; j--) {
item.creators.push(ZU.cleanAuthor(visibleData[j], "author", true));
if (isbn) {
item.ISBN = isbn[1];
if (language) {
item.language = language[1];
if (numPages) {
item.numPages = numPages[1];
if (locNum) {
item.callNumber = locNum[1];
// localized field names
