Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PCC-842, PCC-814, PCC-824 Added: Drupal import command. #160

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions packages/cli/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,3 +63,25 @@ $ pcc whoami
$ pcc logout

```

## Import existing content from a Drupal site

You must ensure that the JSON API for your Drupal site is enabled (which it
should be by default).
https://www.drupal.org/docs/core-modules-and-themes/core-modules/jsonapi-module/api-overview

Once you've ensured that it's working, you will need to determine the URL which
PCC can use to get the initial results page of posts (e.g.
https://example.com/jsonapi/node/article). But please note that the exact URL
will depend on which resource type(s) you want to import.

The second and last piece of information you will need before proceeding to
import, is the id of the PCC site which the posts should be imported into. Posts
are NOT going to be published automatically after importing, but they will be
automatically connected to the site id provided.

With this information, you can now run the import command.

```bash
$ pcc import drupal https://example.com/jsonapi/node/article siteid12345
```
6 changes: 5 additions & 1 deletion packages/cli/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -39,23 +39,27 @@
"dependencies": {
"@pantheon-systems/pcc-sdk-core": "latest",
"axios": "^1.6.0",
"bluebird": "^3.7.2",
"boxen": "^7.1.1",
"chalk": "^5.3.0",
"dayjs": "^1.11.9",
"fs-extra": "^11.1.1",
"get-port": "^7.0.0",
"google-auth-library": "^8.9.0",
"google-auth-library": "^9.4.0",
"googleapis": "^129.0.0",
"inquirer": "^8.2.6",
"nunjucks": "^3.2.4",
"octokit": "^3.1.2",
"open": "^9.1.0",
"ora": "^6.3.1",
"package-json": "^8.1.1",
"query-string": "^8.1.0",
"server-destroy": "^1.0.1",
"yargs": "^17.7.2"
},
"devDependencies": {
"@babel/preset-env": "7.21.5",
"@types/bluebird": "^3.5.42",
"@types/fs-extra": "^11.0.1",
"@types/inquirer": "^9.0.3",
"@types/jest": "29.5.1",
Expand Down
237 changes: 237 additions & 0 deletions packages/cli/src/cli/commands/import.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,237 @@
import { randomUUID } from "crypto";
import { exit } from "process";
import axios, { AxiosError } from "axios";
import Promise from "bluebird";
import chalk from "chalk";
import type { GaxiosResponse } from "gaxios";
import { OAuth2Client } from "google-auth-library";
import { drive_v3, google } from "googleapis";
import queryString from "query-string";
import AddOnApiHelper from "../../lib/addonApiHelper";
import { getLocalAuthDetails } from "../../lib/localStorage";
import { Logger } from "../../lib/logger";
import { errorHandler } from "../exceptions";
import login from "./login";

type ImportParams = {
baseUrl: string;
siteId: string;
verbose: boolean;
};

interface DrupalPost {
id: string;
attributes?: {
body?: {
processed: string;
};
title: string;
};
relationships: {
field_author: {
data: {
id: string;
};
};
field_topics?: {
data: [
{
id: string;
},
];
};
};
}

interface DrupalTopic {
id: string;
attributes?: {
name: string;
};
}

interface DrupalIncludedData {
id: string;
attributes?: {
name: string;
title: string;
};
}

async function getDrupalPosts(url: string) {
try {
console.log(`Importing from ${url}`);
const result = (await axios.get(url)).data;

return {
nextURL: result.links?.next?.href,
posts: result.data,
includedData: result.included,
};
} catch (e) {
console.error(e);
throw e;
}
}

export const importFromDrupal = errorHandler<ImportParams>(
async ({ baseUrl, siteId, verbose }: ImportParams) => {
const logger = new Logger();

if (baseUrl) {
try {
new URL(baseUrl);
} catch (_err) {
logger.error(
chalk.red(
`ERROR: Value provided for \`baseUrl\` is not a valid URL. `,
),
);
exit(1);
}
}

await login(["https://www.googleapis.com/auth/drive.file"]);
const authDetails = await getLocalAuthDetails();

if (!authDetails) {
logger.error(chalk.red(`ERROR: Failed to retrieve login details. `));
exit(1);
}

const oauth2Client = new OAuth2Client();
oauth2Client.setCredentials(authDetails);
const drive = google.drive({
version: "v3",
auth: oauth2Client,
});

const folderRes = (await drive.files
.create({
fields: "id,name",
requestBody: {
name: `PCC Import from Drupal on ${new Date().toLocaleDateString()} unique id: ${randomUUID()}`,
mimeType: "application/vnd.google-apps.folder",
},
})
.catch(console.error)) as GaxiosResponse<drive_v3.Schema$File>;

const folderId = folderRes.data.id;

if (folderId == null) {
logger.error(
chalk.red(
`Failed to create parent folder which we would have imported posts into`,
),
);
exit(1);
}

// Get results.
let page = 0;
const { url, query } = queryString.parseUrl(baseUrl);
query.include = "field_author,field_topics";
const allPosts: DrupalPost[] = [];
const allIncludedData: DrupalIncludedData[] = [];
let nextURL = queryString.stringifyUrl({ url, query });

do {
const drupalData = await getDrupalPosts(nextURL);
nextURL = drupalData.nextURL;

if (drupalData.posts?.length) {
allPosts.push(...drupalData.posts);
}

if (drupalData.includedData?.length) {
allIncludedData.push(...drupalData.includedData);
}
} while (nextURL != null && ++page < 1000);

logger.log(
chalk.green(`Retrieved ${allPosts.length} posts after ${page} pages`),
);

// Ensure that these metadata fields exist.
await AddOnApiHelper.addSiteMetadataField(
siteId,
"blog",
"drupalId",
"string",
);
await AddOnApiHelper.addSiteMetadataField(
siteId,
"blog",
"author",
"string",
);

await Promise.map(
allPosts,
async (post) => {
if (post?.attributes?.body == null) {
console.log("Skipping post", Object.keys(post));
return;
}

// Create the google doc.
const authorName: string | undefined = allIncludedData.find(
(x) => x.id === post.relationships.field_author.data.id,
)?.attributes?.title;

const res = (await drive.files.create({
requestBody: {
// Name from the article.
name: post.attributes.title,
mimeType: "application/vnd.google-apps.document",
parents: [folderId],
},
media: {
mimeType: "text/html",
body: post.attributes.body.processed,
},
})) as GaxiosResponse<drive_v3.Schema$File>;
const fileId = res.data.id;

if (!fileId) {
throw new Error(`Failed to create file for ${post.attributes.title}`);
}

// Add it to the PCC site.
await AddOnApiHelper.getDocument(fileId, true);

try {
await AddOnApiHelper.updateDocument(
fileId,
siteId,
post.attributes.title,
post.relationships.field_topics?.data
?.map(
(topic: DrupalTopic) =>
allIncludedData.find((x) => x.id === topic.id)?.attributes
?.name,
)
.filter((x: string | undefined): x is string => x != null) || [],
{
author: authorName,
drupalId: post.id,
},
verbose,
);
} catch (e) {
console.error(e instanceof AxiosError ? e.response?.data : e);
throw e;
}
},
{
concurrency: 20,
},
);

logger.log(
chalk.green(
`Successfully imported ${allPosts.length} documents into ${folderRes.data.name}`,
),
);
},
);
21 changes: 15 additions & 6 deletions packages/cli/src/cli/commands/login.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,26 @@ nunjucks.configure({ autoescape: true });

const OAUTH_SCOPES = ["https://www.googleapis.com/auth/userinfo.email"];

function login(): Promise<void> {
function login(extraScopes: string[]): Promise<void> {
return new Promise(
// eslint-disable-next-line no-async-promise-executor -- Handling promise rejection in the executor
async (resolve, reject) => {
const spinner = ora("Logging you in...").start();
try {
const authData = await getLocalAuthDetails();
if (authData) {
const jwtPayload = parseJwt(authData.id_token as string);
spinner.succeed(`You are already logged in as ${jwtPayload.email}.`);
return;
const scopes = authData.scope?.split(" ");

if (
!extraScopes?.length ||
extraScopes.find((x) => scopes?.includes(x))
) {
const jwtPayload = parseJwt(authData.id_token as string);
spinner.succeed(
`You are already logged in as ${jwtPayload.email}.`,
);
return resolve();
}
}

const oAuth2Client = new OAuth2Client({
Expand All @@ -41,7 +50,7 @@ function login(): Promise<void> {
// Generate the url that will be used for the consent dialog.
const authorizeUrl = oAuth2Client.generateAuthUrl({
access_type: "offline",
scope: OAUTH_SCOPES,
scope: [...OAUTH_SCOPES, ...extraScopes],
});

const server = http.createServer(async (req, res) => {
Expand Down Expand Up @@ -92,7 +101,7 @@ function login(): Promise<void> {
},
);
}
export default errorHandler<void>(login);
export default errorHandler<string[]>(login);
export const LOGIN_EXAMPLES = [
{ description: "Login the user", command: "$0 login" },
];
Loading
Loading