Skip to content

Commit

Permalink
Merge branch 'master' into yk/add-missing-licenses
Browse files Browse the repository at this point in the history
  • Loading branch information
qtomlinson authored Jan 23, 2025
2 parents 40f99c6 + e25e2a5 commit 54e1631
Show file tree
Hide file tree
Showing 14 changed files with 160 additions and 32 deletions.
6 changes: 5 additions & 1 deletion lib/fetch.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@

const axios = require('axios')

const defaultHeaders = Object.freeze({ 'User-Agent': 'clearlydefined.io crawler ([email protected])' })

axios.defaults.headers = defaultHeaders

function buildRequestOptions(request) {
let responseType = 'text'
if (request.json) {
Expand Down Expand Up @@ -48,4 +52,4 @@ function withDefaults(opts) {
return request => callFetch(request, axiosInstance)
}

module.exports = { callFetch, withDefaults }
module.exports = { callFetch, withDefaults, defaultHeaders }
3 changes: 2 additions & 1 deletion lib/github.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@
// SPDX-License-Identifier: MIT

const GitHubApi = require('@octokit/rest')
const { defaultHeaders } = require('./fetch')

module.exports = {
getClient: function (options) {
const github = new GitHubApi({ headers: { 'user-agent': 'clearlydefined.io' } })
const github = new GitHubApi({ headers: defaultHeaders })
github.authenticate({ type: 'token', token: options.token })
return github
}
Expand Down
3 changes: 2 additions & 1 deletion lib/gitlab.js
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
const { Gitlab } = require('@gitbeaker/node')
const { defaultHeaders } = require('./fetch')

module.exports = {
getClient: function (options) {
const gitlab = new Gitlab({
headers: { 'user-agent': 'clearlydefined.io ' },
headers: defaultHeaders,
token: options?.token
})
return gitlab
Expand Down
5 changes: 3 additions & 2 deletions lib/pypiCoordinatesMapper.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ const EntityCoordinates = require('./entityCoordinates')

class PypiCoordinatesMapper {
constructor(fetch = requestPromise) {
this.baseUrl = 'https://pypi.python.org'
this.baseUrl = 'https://pypi.org'
this._fetch = fetch
}

Expand All @@ -23,7 +23,8 @@ class PypiCoordinatesMapper {

async _resolve(coordinates) {
if (coordinates.name === '..') return null
const url = new URL(`/pypi/${coordinates.name}/json`, this.baseUrl).toString()
const encodedName = encodeURIComponent(coordinates.name)
const url = new URL(`/pypi/${encodedName}/json`, this.baseUrl).toString()
try {
const answer = await this._fetch({ url, method: 'GET', json: true })
return answer?.info?.name && { name: answer.info.name }
Expand Down
3 changes: 2 additions & 1 deletion middleware/github.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ const crypto = require('crypto')
const GitHubApi = require('@octokit/rest')
const asyncMiddleware = require('./asyncMiddleware')
const Github = require('../lib/github')
const { defaultHeaders } = require('../lib/fetch')

let options = null
let cache = null
Expand Down Expand Up @@ -64,7 +65,7 @@ async function setupUserClient(req, token) {
return null
}
// constructor and authenticate are inexpensive (just sets local state)
const client = new GitHubApi({ headers: { 'user-agent': 'clearlydefined.io' } })
const client = new GitHubApi({ headers: defaultHeaders })
client.authenticate({ type: 'token', token })
req.app.locals.user = { github: { client } }
return client
Expand Down
10 changes: 5 additions & 5 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
"ajv": "^6.12.3",
"ajv-errors": "^1.0.0",
"applicationinsights": "^1.5.0",
"axios": "^1.6.0",
"axios": "^1.7.9",
"azure-storage": "^2.10.2",
"base-64": "^0.1.0",
"body-parser": "~1.18.2",
Expand Down
23 changes: 15 additions & 8 deletions providers/summary/clearlydefined.js
Original file line number Diff line number Diff line change
Expand Up @@ -174,13 +174,7 @@ class ClearlyDescribedSummarizer {
return urls
}

addMavenData(result, data, coordinates) {
const urls = this.getMavenUrls(coordinates)

setIfValue(result, 'described.releaseDate', extractDate(data.releaseDate))
setIfValue(result, 'described.urls.registry', urls.registry)
setIfValue(result, 'described.urls.version', `${get(result, 'described.urls.registry')}/${coordinates.revision}`)
setIfValue(result, 'described.urls.download', urls.download)
getDeclaredLicenseMaven(data) {
const projectSummaryLicenses =
get(data, 'manifest.summary.licenses') || get(data, 'manifest.summary.project.licenses') // the project layer was removed in 1.2.0
if (!projectSummaryLicenses) return
Expand All @@ -192,7 +186,18 @@ class ClearlyDescribedSummarizer {
const licenseNames = uniq(flatten(licenseSummaries.map(license => license.name)))
let licenses = licenseUrls.map(extractLicenseFromLicenseUrl).filter(x => x)
if (!licenses.length) licenses = licenseNames.map(x => SPDX.lookupByName(x) || x).filter(x => x)
if (licenses.length) setIfValue(result, 'licensed.declared', SPDX.normalize(licenses.join(' OR ')))
return licenses
}

addMavenData(result, data, coordinates) {
const urls = this.getMavenUrls(coordinates)

setIfValue(result, 'described.releaseDate', extractDate(data.releaseDate))
setIfValue(result, 'described.urls.registry', urls.registry)
setIfValue(result, 'described.urls.version', `${get(result, 'described.urls.registry')}/${coordinates.revision}`)
setIfValue(result, 'described.urls.download', urls.download)
const licenses = this.getDeclaredLicenseMaven(data)
if (licenses?.length) setIfValue(result, 'licensed.declared', SPDX.normalize(licenses.join(' OR ')))
}

addCondaData(result, data, coordinates) {
Expand Down Expand Up @@ -239,6 +244,8 @@ class ClearlyDescribedSummarizer {
'described.urls.download',
`https://repo1.maven.org/maven2/${namespaceAsFolders}/${coordinates.name}/${coordinates.revision}/${coordinates.name}-${coordinates.revision}.jar`
)
const licenses = this.getDeclaredLicenseMaven(data)
if (licenses?.length) setIfValue(result, 'licensed.declared', SPDX.normalize(licenses.join(' OR ')))
}

addNuGetData(result, data, coordinates) {
Expand Down
3 changes: 2 additions & 1 deletion routes/auth.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ const passport = require('passport')
const GitHubStrategy = require('passport-github').Strategy
const { URL } = require('url')
const GitHubApi = require('@octokit/rest')
const { defaultHeaders } = require('../lib/fetch')

const router = express.Router()
let options = null
Expand Down Expand Up @@ -80,7 +81,7 @@ router.get('/github/finalize', passportOrPat(), async (req, res) => {
* @returns {Promise<Array<string>>} - list of permission names
*/
async function getUserDetails(token, org) {
const options = { headers: { 'user-agent': 'clearlydefined.io' } }
const options = { headers: defaultHeaders }
const client = new GitHubApi(options)
token && client.authenticate({ type: 'token', token })

Expand Down
2 changes: 1 addition & 1 deletion routes/originCrate.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

const asyncMiddleware = require('../middleware/asyncMiddleware')
const router = require('express').Router()
const requestPromise = require('../lib/fetch').withDefaults({ headers: { 'user-agent': 'clearlydefined.io' } })
const requestPromise = require('../lib/fetch').callFetch
const { uniq } = require('lodash')

// crates.io API https://github.com/rust-lang/crates.io/blob/03666dd7e35d5985504087f7bf0553fa16380fac/src/router.rs
Expand Down
2 changes: 1 addition & 1 deletion routes/originMaven.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

const asyncMiddleware = require('../middleware/asyncMiddleware')
const router = require('express').Router()
const requestPromise = require('../lib/fetch').withDefaults({ headers: { 'user-agent': 'clearlydefined.io' } })
const requestPromise = require('../lib/fetch').callFetch
const { uniq } = require('lodash')

// maven.org API documentation https://search.maven.org/classic/#api
Expand Down
2 changes: 1 addition & 1 deletion routes/originPyPi.js
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ router.get(
})
)
async function getPypiData(name) {
const url = `https://pypi.python.org/pypi/${encodeURIComponent(name)}/json`
const url = `https://pypi.org/pypi/${encodeURIComponent(name)}/json`
try {
return await requestPromise({ url, method: 'GET', json: true })
} catch (error) {
Expand Down
49 changes: 45 additions & 4 deletions test/lib/fetchTests.js
Original file line number Diff line number Diff line change
@@ -1,9 +1,15 @@
const { fail } = require('assert')
const { callFetch, withDefaults } = require('../../lib/fetch')
const { callFetch, withDefaults, defaultHeaders } = require('../../lib/fetch')
const { expect } = require('chai')
const fs = require('fs')
const mockttp = require('mockttp')

function checkDefaultHeaders(headers) {
for (const [key, value] of Object.entries(defaultHeaders)) {
expect(headers).to.have.property(key.toLowerCase()).that.equals(value)
}
}

describe('CallFetch', () => {
describe('with mock server', () => {
const mockServer = mockttp.getLocal()
Expand All @@ -23,6 +29,40 @@ describe('CallFetch', () => {
expect(response).to.be.deep.equal(JSON.parse(expected))
})

it('checks if all the default headers are present in requests', async () => {
const path = '/search.maven.org/solrsearch/select'
const exactQuery = '?q=g:org.httpcomponents+AND+a:httpcomponents&core=gav&rows=100&wt=json'

const endpointMock = await mockServer.forGet(path).withExactQuery(exactQuery).thenReply(200, 'success')

await callFetch({
url: mockServer.urlFor(path + exactQuery),
method: 'GET',
json: true
})

const requests = await endpointMock.getSeenRequests()
checkDefaultHeaders(requests[0].headers)
})

it('checks if all the default headers and other specific header is present in crate component', async () => {
const path = '/crates.io/api/v1/crates/name'
const endpointMock = await mockServer.forGet(path).thenReply(200, 'success')

await callFetch({
url: mockServer.urlFor(path),
method: 'GET',
json: true,
encoding: null,
headers: {
Accept: 'text/html'
}
})
const requests = await endpointMock.getSeenRequests()
checkDefaultHeaders(requests[0].headers)
expect(requests[0].headers).to.include({ accept: 'text/html' })
})

it('checks if the full response is fetched', async () => {
const path = '/registry.npmjs.com/redis/0.1.0'
const expected = fs.readFileSync('test/fixtures/fetch/redis-0.1.0.json')
Expand Down Expand Up @@ -87,17 +127,17 @@ describe('CallFetch', () => {
const url = mockServer.urlFor(path)
const endpointMock = await mockServer.forGet(path).thenReply(200)

const defaultOptions = { headers: { 'user-agent': 'clearlydefined.io crawler ([email protected])' } }
const defaultOptions = { headers: defaultHeaders }
const requestWithDefaults = withDefaults(defaultOptions)
await requestWithDefaults({ url })
await requestWithDefaults({ url })

const requests = await endpointMock.getSeenRequests()
expect(requests.length).to.equal(2)
expect(requests[0].url).to.equal(url)
expect(requests[0].headers).to.include(defaultOptions.headers)
checkDefaultHeaders(requests[0].headers)
expect(requests[1].url).to.equal(url)
expect(requests[1].headers).to.include(defaultOptions.headers)
checkDefaultHeaders(requests[1].headers)
})

it('checks if the response is text with uri option in GET request', async () => {
Expand Down Expand Up @@ -129,6 +169,7 @@ describe('CallFetch', () => {
const json = await requests[0].body.getJson()
expect(json).to.deep.equal({ test: 'test' })
expect(requests[0].headers).to.include({ 'x-crawler': 'secret' })
expect(checkDefaultHeaders(requests[0].headers))
})

it('should GET with withCredentials set to false', async function () {
Expand Down
Loading

0 comments on commit 54e1631

Please sign in to comment.