523 lines
15 KiB
JavaScript
523 lines
15 KiB
JavaScript
|
{
|
||
|
"translatorID": "50a4cf3f-92ef-4e9f-ab15-815229159b16",
|
||
|
"label": "National Archives of Australia",
|
||
|
"creator": "Tim Sherratt, Aurimas Vinckevicius",
|
||
|
"target": "^https?://recordsearch\\.naa\\.gov\\.au/",
|
||
|
"minVersion": "3.0",
|
||
|
"maxVersion": "",
|
||
|
"priority": 100,
|
||
|
"inRepository": true,
|
||
|
"translatorType": 4,
|
||
|
"browserSupport": "gcsibv",
|
||
|
"lastUpdated": "2021-02-13 11:39:57"
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
National Archives of Australia Translator
|
||
|
Copyright (C) 2011 Tim Sherratt (tim@discontents.com.au, @wragge)
|
||
|
|
||
|
This program is free software: you can redistribute it and/or modify
|
||
|
it under the terms of the GNU Affero General Public License as published by
|
||
|
the Free Software Foundation, either version 3 of the License, or
|
||
|
(at your option) any later version.
|
||
|
|
||
|
This program is distributed in the hope that it will be useful,
|
||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
|
GNU Affero General Public License for more details.
|
||
|
|
||
|
You should have received a copy of the GNU Affero General Public License
|
||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||
|
*/
|
||
|
|
||
|
var multiplesRE = /\/(SeriesListing|ItemsListing|PhotoSearchSearchResults|PhotoListing)\.asp/i;
|
||
|
var singleItemRE = /\/(SeriesDetail|ItemDetail|PhotoSearchItemDetail|ViewImage|PhotoDetail)\.asp/i;
|
||
|
function detectWeb(doc, url) {
|
||
|
// RecordSearch - items and series - or Photosearch results
|
||
|
if (multiplesRE.test(url)) {
|
||
|
return getSearchResults(doc, url, true) ? "multiple" : false;
|
||
|
}
|
||
|
else if (singleItemRE.test(url)) {
|
||
|
return "manuscript";
|
||
|
}
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
function doWeb(doc, url) {
|
||
|
if (detectWeb(doc, url) == "multiple") {
|
||
|
Zotero.selectItems(getSearchResults(doc, url), function (items) {
|
||
|
if (!items) {
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
var urls = [];
|
||
|
for (var i in items) {
|
||
|
urls.push(i);
|
||
|
}
|
||
|
ZU.processDocuments(urls, scrape);
|
||
|
});
|
||
|
}
|
||
|
else {
|
||
|
scrape(doc, url);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
function getSearchResults(doc, url, checkOnly) {
|
||
|
var title,
|
||
|
link,
|
||
|
results,
|
||
|
table,
|
||
|
items = {},
|
||
|
found = false;
|
||
|
var m = url.match(multiplesRE);
|
||
|
if (!m) return false;
|
||
|
|
||
|
switch (m[1].toLowerCase()) {
|
||
|
case 'serieslisting':
|
||
|
table = doc.getElementsByClassName('SearchResults')[0];
|
||
|
if (!table) return false;
|
||
|
|
||
|
results = ZU.xpath(doc, '//table[@class="SearchResults"]//tr[@class!="header"]');
|
||
|
for (let i = 0; i < results.length; i++) {
|
||
|
title = results[i].getElementsByTagName('td')[2];
|
||
|
if (!title) continue;
|
||
|
link = getCleanLinkFromCell(title);
|
||
|
if (!link) continue;
|
||
|
|
||
|
if (checkOnly) return true;
|
||
|
found = true;
|
||
|
items[link] = ZU.trimInternal(title.textContent);
|
||
|
}
|
||
|
break;
|
||
|
case 'itemslisting':
|
||
|
table = doc.getElementsByClassName('SearchResults')[0];
|
||
|
if (!table) return false;
|
||
|
|
||
|
results = ZU.xpath(doc, '//table[@class="SearchResults"]//tr[@class!="header"]');
|
||
|
for (let i = 0; i < results.length; i++) {
|
||
|
title = results[i].getElementsByTagName('td')[3];
|
||
|
if (!title) continue;
|
||
|
link = getCleanLinkFromCell(title);
|
||
|
if (!link) continue;
|
||
|
|
||
|
if (checkOnly) return true;
|
||
|
found = true;
|
||
|
items[link] = ZU.trimInternal(title.textContent);
|
||
|
}
|
||
|
break;
|
||
|
case 'photolisting':
|
||
|
// Try the list view first
|
||
|
results = ZU.xpath(doc, '//table[contains(@id, "PhotoResults")]//table[@class="greyboxdetail"]');
|
||
|
var view = 'list';
|
||
|
// If no results try grid view
|
||
|
if (!results.length) {
|
||
|
results = ZU.xpath(doc, '//table[contains(@id, "PhotoResults")]/tbody/tr/td[@title]');
|
||
|
view = 'grid';
|
||
|
}
|
||
|
for (let i = 0; i < results.length; i++) {
|
||
|
if (view == 'list') {
|
||
|
title = results[i].getElementsByTagName('td')[1];
|
||
|
if (!title) continue;
|
||
|
link = getCleanLinkFromCell(title);
|
||
|
if (!link) continue;
|
||
|
title = title.textContent;
|
||
|
}
|
||
|
else {
|
||
|
title = results[i].getAttribute('title');
|
||
|
if (!title) continue;
|
||
|
link = results[i].getElementsByTagName('a')[0];
|
||
|
if (!link) continue;
|
||
|
link = link.href;
|
||
|
}
|
||
|
|
||
|
if (checkOnly) return true;
|
||
|
found = true;
|
||
|
items[link] = ZU.trimInternal(title);
|
||
|
}
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
return found ? items : false;
|
||
|
}
|
||
|
|
||
|
function getHost(url) {
|
||
|
return url.match(/^https?:\/\/[^/]+/)[0];
|
||
|
}
|
||
|
|
||
|
function scrape(doc, url) {
|
||
|
var m = url.match(singleItemRE);
|
||
|
if (!m) return;
|
||
|
|
||
|
var item;
|
||
|
switch (m[1].toLowerCase()) {
|
||
|
case 'viewimage':
|
||
|
item = scrapeImage(doc, url);
|
||
|
break;
|
||
|
case 'photosearchitemdetail':
|
||
|
item = scrapePhoto(doc, url);
|
||
|
break;
|
||
|
case 'photodetail':
|
||
|
item = scrapePhoto(doc, url);
|
||
|
break;
|
||
|
case 'seriesdetail':
|
||
|
item = scrapeSeries(doc, url);
|
||
|
break;
|
||
|
case 'itemdetail':
|
||
|
item = scrapeItem(doc, url);
|
||
|
break;
|
||
|
default:
|
||
|
throw new Error("Unknown page type: " + m[1]);
|
||
|
}
|
||
|
if (item) {
|
||
|
item.archive = item.libraryCatalog = "National Archives of Australia";
|
||
|
item.complete();
|
||
|
}
|
||
|
}
|
||
|
|
||
|
function createPersistentLink(id, linkType) {
|
||
|
// Create persistent (as possible) links into RS
|
||
|
return 'https://recordsearch.naa.gov.au/scripts/AutoSearch.asp?O=' + linkType + '&Number=' + id;
|
||
|
}
|
||
|
|
||
|
function stripSeries(series) {
|
||
|
// Return cleaned contents of series cells, removing the extra notice
|
||
|
return series.substr(0, series.search(/(Click|All)/));
|
||
|
}
|
||
|
|
||
|
function getCleanLinkFromCell(cell) {
|
||
|
// Get a url from a cell that has an onclick attribute.
|
||
|
var link = cell.getAttribute('onclick');
|
||
|
link = link.substring(link.indexOf("'"));
|
||
|
return 'https://recordsearch.naa.gov.au/SearchNRetrieve/Interface/' + ZU.superCleanString(link);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Series/Item scraping
|
||
|
*/
|
||
|
|
||
|
function parseItemTable(table) {
|
||
|
var meta = {},
|
||
|
rows = table.getElementsByTagName('tr');
|
||
|
for (let i = 0; i < rows.length; i++) {
|
||
|
var td = rows[i].getElementsByTagName('td');
|
||
|
if (td.length != 2) continue;
|
||
|
|
||
|
var label = ZU.trimInternal(td[0].textContent).toLowerCase();
|
||
|
|
||
|
var data;
|
||
|
if (label == 'series note') {
|
||
|
// grab the full note, instead of the truncation
|
||
|
var notes = table.ownerDocument.getElementById('notes');
|
||
|
if (notes && notes.children.length == 2
|
||
|
&& (notes = notes.getElementsByTagName('pre')[0])
|
||
|
) {
|
||
|
data = notes.textContent;
|
||
|
}
|
||
|
else {
|
||
|
data = ZU.trimInternal(td[1].textContent);
|
||
|
}
|
||
|
}
|
||
|
else if (label == 'related searches') {
|
||
|
var childrens = td[1].getElementsByTagName('a');
|
||
|
data = [];
|
||
|
for (let j = 0; j < childrens.length; j++) {
|
||
|
data.push(childrens[i].textContent.trim());
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
data = ZU.trimInternal(td[1].textContent);
|
||
|
}
|
||
|
if (!label || !data) continue;
|
||
|
meta[label] = data;
|
||
|
}
|
||
|
|
||
|
return meta;
|
||
|
}
|
||
|
|
||
|
function scrapeItem(doc) {
|
||
|
var meta = parseItemTable(ZU.xpath(doc, '//div[@class="detailsTable"]//tbody')[0]);
|
||
|
if (!meta) return null;
|
||
|
|
||
|
var item = new Zotero.Item('manuscript');
|
||
|
item.title = meta.title;
|
||
|
item.type = 'item';
|
||
|
item.date = meta['contents date range'];
|
||
|
item.place = meta.location;
|
||
|
var series = stripSeries(meta['series number']);
|
||
|
var control = meta['control symbol'];
|
||
|
item.archiveLocation = series + ', ' + control;
|
||
|
item['access status'] = meta['access status'];
|
||
|
item['access decision'] = meta['date of decision'];
|
||
|
var barcode = encodeURIComponent(meta['item id']);
|
||
|
item.url = createPersistentLink(barcode, 'I');
|
||
|
|
||
|
if (meta['item notes']) {
|
||
|
item.notes.push(meta['item notes']);
|
||
|
}
|
||
|
|
||
|
// Add link to digital copy if available
|
||
|
if (ZU.xpath(doc, '//div[contains(@id, "_pnlDigitalCopy")]/a[contains(normalize-space(text()), "View digital copy")]').length) {
|
||
|
// item.attachments.push({
|
||
|
// title: "Digital copy at National Archives of Australia",
|
||
|
// url: 'https://recordsearch.naa.gov.au/SearchNRetrieve/Interface/ViewImage.aspx?B=' + barcode,
|
||
|
// mimeType: 'text/html',
|
||
|
// snapshot: false
|
||
|
// });
|
||
|
item.attachments.push({
|
||
|
title: 'National Archives of Australia item PDF',
|
||
|
url: 'https://recordsearch.naa.gov.au/SearchNRetrieve/NAAMedia/ViewPDF.aspx?B=' + barcode + '&D=D',
|
||
|
mimeType: 'application/pdf'
|
||
|
});
|
||
|
}
|
||
|
|
||
|
return item;
|
||
|
}
|
||
|
|
||
|
function scrapeSeries(doc) {
|
||
|
var meta = parseItemTable(ZU.xpath(doc, '//div[@class="detailsTable"]//tbody')[0]);
|
||
|
if (!meta) return null;
|
||
|
|
||
|
var item = new Zotero.Item('manuscript');
|
||
|
item.title = meta.title;
|
||
|
item.type = 'series';
|
||
|
item.date = meta['contents dates'];
|
||
|
// Split multiple holdings with semi-colon
|
||
|
if (meta['quantity and location']) {
|
||
|
item.place = meta['quantity and location'].replace(/([A-Z]{1})([0-9]{1})/g, '$1; $2');
|
||
|
}
|
||
|
item.format = meta['predominant physical format'];
|
||
|
item.abstractNote = meta['series note'];
|
||
|
item.archiveLocation = meta['series number'];
|
||
|
item['number of items'] = stripSeries(meta['items in this series on recordsearch']);
|
||
|
var seriesNumber = encodeURIComponent(meta['series number']);
|
||
|
item.url = createPersistentLink(seriesNumber, 'S');
|
||
|
|
||
|
// Agencies recording into this series
|
||
|
var agencies = ZU.xpath(doc, '//div[@id="provenanceRecording"]//div[@class="linkagesInfo"]');
|
||
|
for (let i = 0; i < agencies.length; i++) {
|
||
|
item.creators.push({
|
||
|
lastName: ZU.trimInternal(agencies[i].textContent),
|
||
|
creatorType: "author",
|
||
|
fieldMode: 1
|
||
|
});
|
||
|
}
|
||
|
|
||
|
return item;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* ViewImage
|
||
|
*/
|
||
|
|
||
|
function getImageField(doc, label) {
|
||
|
label = 'lbl' + label;
|
||
|
var data = doc.getElementById(label);
|
||
|
if (!data) return '';
|
||
|
|
||
|
return ZU.trimInternal(data.textContent);
|
||
|
}
|
||
|
|
||
|
function scrapeImage(doc, url) {
|
||
|
var image = doc.getElementById('divImage'),
|
||
|
singleView = image && image.offsetParent; // check if visble
|
||
|
|
||
|
var total = doc.getElementsByName('hTotalPages')[0],
|
||
|
page = doc.getElementsByName('hCurrentPage')[0];
|
||
|
page = page && Number.parseInt(page.value);
|
||
|
total = total && Number.parseInt(total.value);
|
||
|
|
||
|
var item = new Zotero.Item('manuscript');
|
||
|
|
||
|
item.title = getImageField(doc, 'Title');
|
||
|
if (singleView && page && total != 1) {
|
||
|
item.title += ' [' + page + (total ? ' of ' + total : '') + ']';
|
||
|
}
|
||
|
|
||
|
item.date = getImageField(doc, 'ContentsDate');
|
||
|
item.archiveLocation = getImageField(doc, 'Series') + ', ' + getImageField(doc, 'ControlSymbol');
|
||
|
|
||
|
var barcode = getImageField(doc, 'Barcode');
|
||
|
item.url = getHost(url) + '/SearchNRetrieve/Interface/ViewImage.aspx?'
|
||
|
+ 'B=' + encodeURIComponent(barcode)
|
||
|
+ (singleView ? '&S=' + page : '');
|
||
|
|
||
|
var imageUrlBase = '/SearchNRetrieve/NAAMedia/ShowImage.aspx?T=P&B=' + encodeURIComponent(barcode);
|
||
|
// In single view, save current image. In multiples view, save all
|
||
|
// (unless more than 10, then don't save at all)
|
||
|
if ((singleView && page) || (!singleView && total && total < 11)) {
|
||
|
var from = singleView ? page - 1 : 0, to = singleView ? page : total;
|
||
|
// var includeCount = total != 1;
|
||
|
|
||
|
for (let i = from; i < to; i++) {
|
||
|
item.attachments.push({
|
||
|
title: 'Folio'
|
||
|
+ (total != 1
|
||
|
? ' ' + (i + 1) + (total ? ' of ' + total : '')
|
||
|
: '')
|
||
|
+ ' [' + item.archiveLocation + ']',
|
||
|
url: imageUrlBase + '&S=' + page,
|
||
|
mimeType: 'image/jpeg'
|
||
|
});
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return item;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* PhotoSearch
|
||
|
*/
|
||
|
|
||
|
function scrapePhoto(doc) {
|
||
|
var table = ZU.xpath(doc, '//table[contains(@id, "PhotoDetailTable")]//table[contains(@id, "GreyBoxTable")]/tbody')[0];
|
||
|
if (!table) return null;
|
||
|
|
||
|
var meta = parseItemTable(table);
|
||
|
var item = new Zotero.Item('manuscript');
|
||
|
item.title = meta.title;
|
||
|
item.type = 'photograph';
|
||
|
item.date = meta.date || meta['date range'];
|
||
|
item.place = meta.location || meta['item location'];
|
||
|
item.archiveLocation = meta['series/control symbol'];
|
||
|
item.url = createPersistentLink(encodeURIComponent(meta['item id']), 'PSI');
|
||
|
|
||
|
if (meta['related searches']) {
|
||
|
item.tags = meta['related searches'];
|
||
|
}
|
||
|
|
||
|
var imageurl = ZU.xpathText(doc, '//table[contains(@id, "PhotoDetailTable")]//img/@src');
|
||
|
if (imageurl) {
|
||
|
imageurl = imageurl.replace(/([?&])T=[^&]*(?:&|$)/g, '$1') + '&T=P'; // T=P better quality
|
||
|
item.attachments.push({
|
||
|
title: 'Digital image of NAA: ' + item.archiveLocation,
|
||
|
url: imageurl,
|
||
|
mimeType: 'image/jpeg' // Seems like that is generally the case
|
||
|
});
|
||
|
}
|
||
|
|
||
|
return item;
|
||
|
}
|
||
|
|
||
|
/** BEGIN TEST CASES **/
|
||
|
var testCases = [
|
||
|
{
|
||
|
"type": "web",
|
||
|
"url": "https://recordsearch.naa.gov.au/SearchNRetrieve/Interface/ViewImage.aspx?B=12048&S=4",
|
||
|
"defer": true,
|
||
|
"items": [
|
||
|
{
|
||
|
"itemType": "manuscript",
|
||
|
"title": "Carl Gustav Opitz - Naturalization [4 of 7]",
|
||
|
"creators": [],
|
||
|
"date": "1911 - 1912",
|
||
|
"archive": "National Archives of Australia",
|
||
|
"archiveLocation": "A1, 1911/18393",
|
||
|
"libraryCatalog": "National Archives of Australia",
|
||
|
"url": "https://recordsearch.naa.gov.au/SearchNRetrieve/Interface/ViewImage.aspx?B=12048&S=4",
|
||
|
"attachments": [
|
||
|
{
|
||
|
"title": "Folio 4 of 7 [A1, 1911/18393]",
|
||
|
"mimeType": "image/jpeg"
|
||
|
}
|
||
|
],
|
||
|
"tags": [],
|
||
|
"notes": [],
|
||
|
"seeAlso": []
|
||
|
}
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"type": "web",
|
||
|
"url": "https://recordsearch.naa.gov.au/scripts/AutoSearch.asp?O=PSI&Number=11404473",
|
||
|
"defer": true,
|
||
|
"items": [
|
||
|
{
|
||
|
"itemType": "manuscript",
|
||
|
"title": "Antarctica - A Gentoo penguin on Heard Island guards its nesting mate [photographic image]. 1 photographic negative: b&w, acetate",
|
||
|
"creators": [],
|
||
|
"date": "1949 - 1949",
|
||
|
"archive": "National Archives of Australia",
|
||
|
"archiveLocation": "A1200, L11912",
|
||
|
"libraryCatalog": "National Archives of Australia",
|
||
|
"manuscriptType": "photograph",
|
||
|
"place": "Canberra",
|
||
|
"shortTitle": "Antarctica - A Gentoo penguin on Heard Island guards its nesting mate [photographic image]. 1 photographic negative",
|
||
|
"url": "https://recordsearch.naa.gov.au/scripts/AutoSearch.asp?O=PSI&Number=11404473",
|
||
|
"attachments": [
|
||
|
{
|
||
|
"title": "Digital image of NAA: A1200, L11912",
|
||
|
"mimeType": "image/jpeg"
|
||
|
}
|
||
|
],
|
||
|
"tags": [],
|
||
|
"notes": [],
|
||
|
"seeAlso": []
|
||
|
}
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"type": "web",
|
||
|
"defer": true,
|
||
|
"url": "https://recordsearch.naa.gov.au/scripts/AutoSearch.asp?O=I&Number=149309",
|
||
|
"items": [
|
||
|
{
|
||
|
"itemType": "manuscript",
|
||
|
"title": "The Wragge Estate. Property for sale.",
|
||
|
"creators": [],
|
||
|
"date": "1917 - 1917",
|
||
|
"archive": "National Archives of Australia",
|
||
|
"archiveLocation": "A2479, 17/1306",
|
||
|
"libraryCatalog": "National Archives of Australia",
|
||
|
"manuscriptType": "item",
|
||
|
"place": "Canberra",
|
||
|
"url": "https://recordsearch.naa.gov.au/scripts/AutoSearch.asp?O=I&Number=149309",
|
||
|
"attachments": [
|
||
|
{
|
||
|
"title": "National Archives of Australia item PDF",
|
||
|
"mimeType": "application/pdf"
|
||
|
}
|
||
|
],
|
||
|
"tags": [],
|
||
|
"notes": [],
|
||
|
"seeAlso": []
|
||
|
}
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"type": "web",
|
||
|
"url": "https://recordsearch.naa.gov.au/scripts/AutoSearch.asp?O=S&Number=A2",
|
||
|
"defer": true,
|
||
|
"items": [
|
||
|
{
|
||
|
"itemType": "manuscript",
|
||
|
"title": "Correspondence files, annual single number series",
|
||
|
"creators": [
|
||
|
{
|
||
|
"lastName": "CA 588, Prime Minister's Office",
|
||
|
"creatorType": "author",
|
||
|
"fieldMode": 1
|
||
|
},
|
||
|
{
|
||
|
"lastName": "CA 12, Prime Minister's Department",
|
||
|
"creatorType": "author",
|
||
|
"fieldMode": 1
|
||
|
}
|
||
|
],
|
||
|
"date": "01 Jan 1895 - 31 Dec 1926",
|
||
|
"abstractNote": "Name Index Volumes, 1917-1951 (ex CP 602/1, AS12/8)",
|
||
|
"archive": "National Archives of Australia",
|
||
|
"archiveLocation": "A2",
|
||
|
"libraryCatalog": "National Archives of Australia",
|
||
|
"manuscriptType": "series",
|
||
|
"place": "35.74 metres held in ACT",
|
||
|
"url": "https://recordsearch.naa.gov.au/scripts/AutoSearch.asp?O=S&Number=A2",
|
||
|
"attachments": [],
|
||
|
"tags": [],
|
||
|
"notes": [],
|
||
|
"seeAlso": []
|
||
|
}
|
||
|
]
|
||
|
}
|
||
|
]
|
||
|
/** END TEST CASES **/
|