{ "translatorID": "8efcb7cb-4180-4555-969a-08e8b34066c4", "label": "Trove", "creator": "Tim Sherratt", "target": "^https?://trove\\.nla\\.gov\\.au/(?:newspaper|gazette|work|book|article|picture|music|map|collection)/", "minVersion": "3.0", "maxVersion": "", "priority": 100, "inRepository": true, "translatorType": 4, "browserSupport": "gcsibv", "lastUpdated": "2020-09-15 01:24:23" } /* Trove Translator Copyright (C) 2016 Tim Sherratt (tim@discontents.com.au, @wragge) This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see . */ function detectWeb(doc, url) { // Note that the url for search results has changed, // so the first pattern will never match. // However, results scraping needs to be rewritten due to the redesign, // so leave this as is for now. if (url.includes('/result?') || url.includes('/newspaper/page')) { return getSearchResults(doc, url, true) ? 'multiple' : false; } else if (url.includes('/newspaper/article')) { return "newspaperArticle"; } // Scraping from works is very brokened due to site redesign // Prevent detection until a fix is available // else if (url.includes('/work/')) { // return "book"; // } return false; } function getSearchResults(doc, url, checkOnly) { var items = {}; var results; var found = false; if (url.includes('/result?')) { results = ZU.xpath(doc, "//div[@id='mainresults']//li/dl/dt/a"); } else { results = ZU.xpath(doc, "//ol[@class='list-unstyled articles']/li/h4/a"); } for (var i = 0; i < results.length; i++) { var link = results[i].href; var title = ZU.trimInternal(results[i].textContent); if (!title || !link) continue; if (checkOnly) return true; found = true; items[link] = title; } return found ? items : false; } function doWeb(doc, url) { if (detectWeb(doc, url) == "multiple") { Zotero.selectItems(getSearchResults(doc, url), function (items) { if (!items) return; for (var i in items) { scrape(null, i); } }); } else { scrape(doc, url); } } function scrape(doc, url) { if (url.includes('/newspaper/article/')) { scrapeNewspaper(doc, url); } else { scrapeWork(doc, url); } } function scrapeNewspaper(doc, url) { var articleID = url.match(/newspaper\/article\/(\d+)/)[1]; var bibtexURL = "http://trove.nla.gov.au/newspaper/citations/bibtex-article-" + articleID + ".bibtex"; ZU.HTTP.doGet(bibtexURL, function (bibtex) { var translator = Zotero.loadTranslator("import"); translator.setTranslator("9cb70025-a888-4a29-a210-93ec52da40d4"); translator.setString(bibtex); // Clean up the BibTex results and add some extra stuff. translator.setHandler("itemDone", function (obj, item) { item.itemType = 'newspaperArticle'; item.pages = item.numPages; delete item.numPages; delete item.type; delete item.itemID; // doc is null during multiple call if (doc) { item.abstractNote = ZU.xpathText(doc, "//meta[@property='og:description']/@content"); // Add tags var tags = ZU.xpath(doc, "//ul[contains(@class,'nlaTagContainer')]/li"); for (let tag of tags) { tag = ZU.xpathText(tag, "div/a[not(contains(@class,'anno-remove'))]"); item.tags.push(tag); } } // I've created a proxy server to generate the PDF and return the URL without locking up the browser. var proxyURL = "https://trove-proxy.herokuapp.com/pdf/" + articleID; ZU.doGet(proxyURL, function (pdfURL) { // With the last argument 'false' passed to doGet // we allow all status codes to continue and reach // the item.complete() command. if (pdfURL.startsWith('http')) { item.attachments.push({ url: pdfURL, title: 'Trove newspaper PDF', mimeType: 'application/pdf' }); } else { Zotero.debug("No PDF because unexpected return from trove-proxy " + proxyURL); Zotero.debug(pdfURL); } // Get the OCRd text and save in a note. var textURL = "http://trove.nla.gov.au/newspaper/rendition/nla.news-article" + articleID + ".txt"; ZU.HTTP.doGet(textURL, function (text) { item.notes.push({ note: text.trim() }); item.complete(); }); }, null, null, null, false); }); translator.translate(); }); } var troveTypes = { Book: "book", "Article Article/Book chapter": "bookSection", Thesis: "thesis", "Archived website": "webpage", "Conference Proceedings": "book", "Audio book": "book", Article: "journalArticle", "Article Article/Journal or magazine article": "journalArticle", "Article Article/Conference paper": "conferencePaper", "Article Article/Report": "report", Photograph: "artwork", "Poster, chart, other": "artwork", "Art work": "artwork", Object: "artwork", "Microform Photograph": "artwork", "Microform Object": "artwork", Sound: "audioRecording", Video: "videoRecording", "Printed music": "book", Map: "map", Unpublished: "manuscript", Published: "document" }; // The function ... function checkType(string) { var types = string.split("; "); var newString; while (types.length > 0) { newString = types.join(" "); if (troveTypes.hasOwnProperty(newString)) { return troveTypes[newString]; } types.pop(); } return "book"; } // Sometimes authors are a little messy and we need to clean them // e.g. author = { Bayley, William A. (William Alan), 1910-1981 }, // results in // "firstName": "1910-1981, William A. (William Alan)", // "lastName": "Bayley" function cleanCreators(creators) { for (var i = 0; i < creators.length; i++) { var name = creators[i].firstName; name = name.replace(/\(?\d{4}-\d{0,4}\)?,?/, "").trim(); var posParenthesis = name.indexOf("("); if (posParenthesis > -1) { var first = name.substr(0, posParenthesis); var second = name.substr(posParenthesis + 1, name.length - posParenthesis - 2); if (second.includes(first.replace('.', '').trim())) { name = second; } else { name = first; } } creators[i].firstName = name.trim(); } return creators; } function scrapeWork(doc, url) { var thumbnailURL; // Remove all params from url var workURL = url.replace(/[?#].*/, ''); var bibtexURL = workURL + '?citationFormat=BibTeX'; if (doc) { // Need to get version identifier for the BibText url var versionID = doc.body.innerHTML.match(/displayCiteDialog\('(.+?)'/); if (versionID !== null) { bibtexURL += '&selectedversion=' + versionID[1]; thumbnailURL = ZU.xpathText(doc, "//a/img[@class='mosaic ui-shdw']/@src"); } else { // It's a work -- so thumbnails are different thumbnailURL = ZU.xpathText(doc, "//li[@class='imgfirst']//img/@src"); } } // Get the BibTex and feed it to the translator. ZU.HTTP.doGet(bibtexURL, function (bibtex) { var translator = Zotero.loadTranslator("import"); translator.setTranslator("9cb70025-a888-4a29-a210-93ec52da40d4"); translator.setString(bibtex); translator.setHandler("itemDone", function (obj, item) { item.itemType = checkType(item.type); item.creators = cleanCreators(item.creators); // Attach a link to the contributing repository if available if (item.hasOwnProperty('url')) { item.attachments.push({ title: "Record from contributing repository", url: item.url, mimeType: 'text/html', snapshot: false }); } if (doc) { // This gives a better version-aware url. item.url = ZU.xpathText(doc, "//meta[@property='og:url']/@content"); item.abstractNote = ZU.xpathText(doc, "//meta[@property='og:description']/@content"); // Add tags let tags = ZU.xpath(doc, "//div[@id='tagswork' or @id='content-tags']/ul/li"); for (var i = 0; i < tags.length; i++) { let tag = ZU.xpathText(tags[i], "a"); item.tags.push(tag); } } if (thumbnailURL !== null) { item.attachments.push({ url: thumbnailURL, title: 'Trove thumbnail image', mimeType: 'image/jpeg' }); } item.complete(); }); translator.translate(); }); } /** BEGIN TEST CASES **/ var testCases = [ { "type": "web", "url": "https://trove.nla.gov.au/work/9958833?q&versionId=11567057", "items": [ { "itemType": "book", "title": "Experiences of a meteorologist in South Australia", "creators": [ { "firstName": "Clement Lindley", "lastName": "Wragge", "creatorType": "author" } ], "date": "1980", "ISBN": "9780908065073", "abstractNote": "In 14 libraries. 24 p. : ill. ; 22 cm. Wragge, Clement L. (Clement Lindley), 1852-1922. South Australia. Climate, 1883-1884. Meteorologists -- South Australia -- Biography. South Australia -- Description and travel. South Australia -- Climate -- History.", "itemID": "trove.nla.gov.au/work/9958833", "language": "English", "libraryCatalog": "Trove", "publisher": "Warradale, S.Aust. : Pioneer Books", "url": "https://trove.nla.gov.au/version/11567057", "attachments": [], "tags": [], "notes": [ { "note": "

Reprinted from Good words for 1887/ edited by Donald Macleod, published: London: Isbister and Co

" } ], "seeAlso": [] } ] }, { "type": "web", "url": "https://trove.nla.gov.au/newspaper/article/70068753", "items": [ { "itemType": "newspaperArticle", "title": "'WRAGGE.'", "creators": [], "date": "7 Feb 1903", "abstractNote": "We have received a copy of the above which is a journal devoted chiefly to the science of meteorology. It is owned and conducted by Mr. Clement ...", "libraryCatalog": "Trove", "pages": "4", "place": "Vic.", "publicationTitle": "Sunbury News (Vic. : 1900 - 1927)", "url": "http://nla.gov.au/nla.news-article70068753", "attachments": [ { "title": "Trove newspaper PDF", "mimeType": "application/pdf" } ], "tags": [ { "tag": "Meteorology Journal - Clement Wragge" } ], "notes": [ { "note": "\n \n 07 Feb 1903 - 'WRAGGE.'\n \n \n

Sunbury News (Vic. : 1900 - 1927), Saturday 7 February 1903, page 4

\n
\n

'WRAGGE' - we have received a copy of the above, which is a journal devoted chiefly to the science of meteorology. It is owned and conducted by Mr. Clement Wragge.

\n \n" } ], "seeAlso": [] } ] }, { "type": "web", "url": "http://trove.nla.gov.au/newspaper/result?l-australian=y&q=wragge", "items": "multiple" }, { "type": "web", "url": "http://trove.nla.gov.au/book/result?l-australian=y&q=wragge", "items": "multiple" }, { "type": "web", "url": "https://trove.nla.gov.au/newspaper/page/7013947", "items": "multiple" }, { "type": "web", "url": "https://trove.nla.gov.au/work/9531118?q&sort=holdings+desc&_=1483112824975&versionId=14744047", "items": [ { "itemType": "book", "title": "Lithgow zig zag railway, Blue Mountains, New South Wales", "creators": [ { "firstName": "William Alan", "lastName": "Bayley", "creatorType": "author" } ], "date": "1969", "abstractNote": "In 19 libraries. 40 p. : ill., map ; 22 cm. Great Zig Zag Railway (Lithgow, N.S.W.) Railroads -- Blue Mountains (N.S.W. : Mountains) Zig Zag Railway -- Lithgow, Australia. Railroads -- New South Wales. Railroads -- New South Wales -- Blue Mountains. Blue Mountains (N.S.W.)", "itemID": "trove.nla.gov.au/work/9531118", "language": "English", "libraryCatalog": "Trove", "publisher": "[Bulli, N.S.W. : Zig Zag Press", "url": "https://trove.nla.gov.au/version/14744047", "attachments": [], "tags": [], "notes": [ { "note": "

Cover title

" } ], "seeAlso": [] } ] } ] /** END TEST CASES **/