Skip to content

Commit

Permalink
Merge pull request #160 from pantheon-systems/PCC-842-create-script-w…
Browse files Browse the repository at this point in the history
…hich-transforms-drupal-json-api-export-to-pcc-powered-google-documents

PCC-842, PCC-814, PCC-824 Added: Drupal import command.
  • Loading branch information
kevinstubbs authored Jan 8, 2024
2 parents b120e5c + f2dba22 commit d3a5d67
Show file tree
Hide file tree
Showing 8 changed files with 5,273 additions and 6,852 deletions.
22 changes: 22 additions & 0 deletions packages/cli/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,3 +63,25 @@ $ pcc whoami
$ pcc logout

```

## Import existing content from a Drupal site

You must ensure that the JSON API for your Drupal site is enabled (which it
should be by default).
https://www.drupal.org/docs/core-modules-and-themes/core-modules/jsonapi-module/api-overview

Once you've ensured that it's working, you will need to determine the URL which
PCC can use to get the initial results page of posts (e.g.
https://example.com/jsonapi/node/article). But please note that the exact URL
will depend on which resource type(s) you want to import.

The second and last piece of information you will need before proceeding to
import, is the id of the PCC site which the posts should be imported into. Posts
are NOT going to be published automatically after importing, but they will be
automatically connected to the site id provided.

With this information, you can now run the import command.

```bash
$ pcc import drupal https://example.com/jsonapi/node/article siteid12345
```
6 changes: 5 additions & 1 deletion packages/cli/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -39,23 +39,27 @@
"dependencies": {
"@pantheon-systems/pcc-sdk-core": "latest",
"axios": "^1.6.0",
"bluebird": "^3.7.2",
"boxen": "^7.1.1",
"chalk": "^5.3.0",
"dayjs": "^1.11.9",
"fs-extra": "^11.1.1",
"get-port": "^7.0.0",
"google-auth-library": "^8.9.0",
"google-auth-library": "^9.4.0",
"googleapis": "^129.0.0",
"inquirer": "^8.2.6",
"nunjucks": "^3.2.4",
"octokit": "^3.1.2",
"open": "^9.1.0",
"ora": "^6.3.1",
"package-json": "^8.1.1",
"query-string": "^8.1.0",
"server-destroy": "^1.0.1",
"yargs": "^17.7.2"
},
"devDependencies": {
"@babel/preset-env": "7.21.5",
"@types/bluebird": "^3.5.42",
"@types/fs-extra": "^11.0.1",
"@types/inquirer": "^9.0.3",
"@types/jest": "29.5.1",
Expand Down
237 changes: 237 additions & 0 deletions packages/cli/src/cli/commands/import.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,237 @@
import { randomUUID } from "crypto";
import { exit } from "process";
import axios, { AxiosError } from "axios";
import Promise from "bluebird";
import chalk from "chalk";
import type { GaxiosResponse } from "gaxios";
import { OAuth2Client } from "google-auth-library";
import { drive_v3, google } from "googleapis";
import queryString from "query-string";
import AddOnApiHelper from "../../lib/addonApiHelper";
import { getLocalAuthDetails } from "../../lib/localStorage";
import { Logger } from "../../lib/logger";
import { errorHandler } from "../exceptions";
import login from "./login";

type ImportParams = {
baseUrl: string;
siteId: string;
verbose: boolean;
};

interface DrupalPost {
id: string;
attributes?: {
body?: {
processed: string;
};
title: string;
};
relationships: {
field_author: {
data: {
id: string;
};
};
field_topics?: {
data: [
{
id: string;
},
];
};
};
}

interface DrupalTopic {
id: string;
attributes?: {
name: string;
};
}

interface DrupalIncludedData {
id: string;
attributes?: {
name: string;
title: string;
};
}

async function getDrupalPosts(url: string) {
try {
console.log(`Importing from ${url}`);
const result = (await axios.get(url)).data;

return {
nextURL: result.links?.next?.href,
posts: result.data,
includedData: result.included,
};
} catch (e) {
console.error(e);
throw e;
}
}

export const importFromDrupal = errorHandler<ImportParams>(
async ({ baseUrl, siteId, verbose }: ImportParams) => {
const logger = new Logger();

if (baseUrl) {
try {
new URL(baseUrl);
} catch (_err) {
logger.error(
chalk.red(
`ERROR: Value provided for \`baseUrl\` is not a valid URL. `,
),
);
exit(1);
}
}

await login(["https://www.googleapis.com/auth/drive.file"]);
const authDetails = await getLocalAuthDetails();

if (!authDetails) {
logger.error(chalk.red(`ERROR: Failed to retrieve login details. `));
exit(1);
}

const oauth2Client = new OAuth2Client();
oauth2Client.setCredentials(authDetails);
const drive = google.drive({
version: "v3",
auth: oauth2Client,
});

const folderRes = (await drive.files
.create({
fields: "id,name",
requestBody: {
name: `PCC Import from Drupal on ${new Date().toLocaleDateString()} unique id: ${randomUUID()}`,
mimeType: "application/vnd.google-apps.folder",
},
})
.catch(console.error)) as GaxiosResponse<drive_v3.Schema$File>;

const folderId = folderRes.data.id;

if (folderId == null) {
logger.error(
chalk.red(
`Failed to create parent folder which we would have imported posts into`,
),
);
exit(1);
}

// Get results.
let page = 0;
const { url, query } = queryString.parseUrl(baseUrl);
query.include = "field_author,field_topics";
const allPosts: DrupalPost[] = [];
const allIncludedData: DrupalIncludedData[] = [];
let nextURL = queryString.stringifyUrl({ url, query });

do {
const drupalData = await getDrupalPosts(nextURL);
nextURL = drupalData.nextURL;

if (drupalData.posts?.length) {
allPosts.push(...drupalData.posts);
}

if (drupalData.includedData?.length) {
allIncludedData.push(...drupalData.includedData);
}
} while (nextURL != null && ++page < 1000);

logger.log(
chalk.green(`Retrieved ${allPosts.length} posts after ${page} pages`),
);

// Ensure that these metadata fields exist.
await AddOnApiHelper.addSiteMetadataField(
siteId,
"blog",
"drupalId",
"string",
);
await AddOnApiHelper.addSiteMetadataField(
siteId,
"blog",
"author",
"string",
);

await Promise.map(
allPosts,
async (post) => {
if (post?.attributes?.body == null) {
console.log("Skipping post", Object.keys(post));
return;
}

// Create the google doc.
const authorName: string | undefined = allIncludedData.find(
(x) => x.id === post.relationships.field_author.data.id,
)?.attributes?.title;

const res = (await drive.files.create({
requestBody: {
// Name from the article.
name: post.attributes.title,
mimeType: "application/vnd.google-apps.document",
parents: [folderId],
},
media: {
mimeType: "text/html",
body: post.attributes.body.processed,
},
})) as GaxiosResponse<drive_v3.Schema$File>;
const fileId = res.data.id;

if (!fileId) {
throw new Error(`Failed to create file for ${post.attributes.title}`);
}

// Add it to the PCC site.
await AddOnApiHelper.getDocument(fileId, true);

try {
await AddOnApiHelper.updateDocument(
fileId,
siteId,
post.attributes.title,
post.relationships.field_topics?.data
?.map(
(topic: DrupalTopic) =>
allIncludedData.find((x) => x.id === topic.id)?.attributes
?.name,
)
.filter((x: string | undefined): x is string => x != null) || [],
{
author: authorName,
drupalId: post.id,
},
verbose,
);
} catch (e) {
console.error(e instanceof AxiosError ? e.response?.data : e);
throw e;
}
},
{
concurrency: 20,
},
);

logger.log(
chalk.green(
`Successfully imported ${allPosts.length} documents into ${folderRes.data.name}`,
),
);
},
);
21 changes: 15 additions & 6 deletions packages/cli/src/cli/commands/login.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,26 @@ nunjucks.configure({ autoescape: true });

const OAUTH_SCOPES = ["https://www.googleapis.com/auth/userinfo.email"];

function login(): Promise<void> {
function login(extraScopes: string[]): Promise<void> {
return new Promise(
// eslint-disable-next-line no-async-promise-executor -- Handling promise rejection in the executor
async (resolve, reject) => {
const spinner = ora("Logging you in...").start();
try {
const authData = await getLocalAuthDetails();
if (authData) {
const jwtPayload = parseJwt(authData.id_token as string);
spinner.succeed(`You are already logged in as ${jwtPayload.email}.`);
return;
const scopes = authData.scope?.split(" ");

if (
!extraScopes?.length ||
extraScopes.find((x) => scopes?.includes(x))
) {
const jwtPayload = parseJwt(authData.id_token as string);
spinner.succeed(
`You are already logged in as ${jwtPayload.email}.`,
);
return resolve();
}
}

const oAuth2Client = new OAuth2Client({
Expand All @@ -41,7 +50,7 @@ function login(): Promise<void> {
// Generate the url that will be used for the consent dialog.
const authorizeUrl = oAuth2Client.generateAuthUrl({
access_type: "offline",
scope: OAUTH_SCOPES,
scope: [...OAUTH_SCOPES, ...extraScopes],
});

const server = http.createServer(async (req, res) => {
Expand Down Expand Up @@ -92,7 +101,7 @@ function login(): Promise<void> {
},
);
}
export default errorHandler<void>(login);
export default errorHandler<string[]>(login);
export const LOGIN_EXAMPLES = [
{ description: "Login the user", command: "$0 login" },
];
Loading

0 comments on commit d3a5d67

Please sign in to comment.